{ "cells": [ { "cell_type": "markdown", "id": "5f947b1b", "metadata": {}, "source": [ "## Imports:" ] }, { "cell_type": "code", "execution_count": 45, "id": "520a3060", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pip in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (21.2.4)\n", "Collecting pip\n", " Using cached pip-25.3-py3-none-any.whl (1.8 MB)\n", "Requirement already satisfied: setuptools in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (58.0.4)\n", "Collecting setuptools\n", " Using cached setuptools-80.9.0-py3-none-any.whl (1.2 MB)\n", "Requirement already satisfied: wheel in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.37.0)\n", "Collecting wheel\n", " Using cached wheel-0.45.1-py3-none-any.whl (72 kB)\n", "Installing collected packages: wheel, setuptools, pip\n", " Attempting uninstall: wheel\n", " Found existing installation: wheel 0.37.0\n", " Uninstalling wheel-0.37.0:\n", " Successfully uninstalled wheel-0.37.0\n", " Attempting uninstall: setuptools\n", " Found existing installation: setuptools 58.0.4\n", " Uninstalling setuptools-58.0.4:\n", " Successfully uninstalled setuptools-58.0.4\n", " Attempting uninstall: pip\n", " Found existing installation: pip 21.2.4\n", " Uninstalling pip-21.2.4:\n", " Successfully uninstalled pip-21.2.4\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "spyder 5.1.5 requires pyqt5<5.13, which is not installed.\n", "spyder 5.1.5 requires pyqtwebengine<5.13, which is not installed.\n", "conda-repo-cli 1.0.4 requires pathlib, which is not installed.\n", "anaconda-project 0.10.1 requires ruamel-yaml, which is not installed.\n", "numba 0.54.1 requires numpy<1.21,>=1.17, but you have numpy 2.0.2 which is incompatible.\u001b[0m\n", "Successfully installed pip-25.3 setuptools-80.9.0 wheel-0.45.1\n" ] } ], "source": [ "!pip install --upgrade pip setuptools wheel" ] }, { "cell_type": "code", "execution_count": 46, "id": "75f54e63", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: langchain_core in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.3.80)\n", "Requirement already satisfied: langchain_community in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.3.31)\n", "Requirement already satisfied: langchain_ollama in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.3.10)\n", "Requirement already satisfied: langchain_groq in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.3.8)\n", "Requirement already satisfied: groq in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.36.0)\n", "Requirement already satisfied: pymupdf in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (1.26.5)\n", "Collecting faiss-cpu==1.12.0\n", " Using cached faiss_cpu-1.12.0-cp39-cp39-macosx_13_0_x86_64.whl.metadata (5.1 kB)\n", "Requirement already satisfied: numpy<3.0,>=1.25.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from faiss-cpu==1.12.0) (2.0.2)\n", "Requirement already satisfied: packaging in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from faiss-cpu==1.12.0) (25.0)\n", "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_core) (0.4.37)\n", "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_core) (9.1.2)\n", "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_core) (1.33)\n", "Requirement already satisfied: PyYAML<7.0.0,>=5.3.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_core) (6.0)\n", "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_core) (4.15.0)\n", "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_core) (2.12.4)\n", "Requirement already satisfied: jsonpointer>=1.9 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain_core) (3.0.0)\n", "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langsmith<1.0.0,>=0.3.45->langchain_core) (0.28.1)\n", "Requirement already satisfied: orjson>=3.9.14 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langsmith<1.0.0,>=0.3.45->langchain_core) (3.11.4)\n", "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langsmith<1.0.0,>=0.3.45->langchain_core) (1.0.0)\n", "Requirement already satisfied: requests>=2.0.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langsmith<1.0.0,>=0.3.45->langchain_core) (2.32.5)\n", "Requirement already satisfied: zstandard>=0.23.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langsmith<1.0.0,>=0.3.45->langchain_core) (0.25.0)\n", "Requirement already satisfied: anyio in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain_core) (4.11.0)\n", "Requirement already satisfied: certifi in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain_core) (2021.10.8)\n", "Requirement already satisfied: httpcore==1.* in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain_core) (1.0.9)\n", "Requirement already satisfied: idna in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain_core) (3.2)\n", "Requirement already satisfied: h11>=0.16 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain_core) (0.16.0)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic<3.0.0,>=2.7.4->langchain_core) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.41.5 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic<3.0.0,>=2.7.4->langchain_core) (2.41.5)\n", "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic<3.0.0,>=2.7.4->langchain_core) (0.4.2)\n", "Requirement already satisfied: langchain<2.0.0,>=0.3.27 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_community) (0.3.27)\n", "Requirement already satisfied: SQLAlchemy<3.0.0,>=1.4.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_community) (1.4.22)\n", "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_community) (3.13.2)\n", "Requirement already satisfied: dataclasses-json<0.7.0,>=0.6.7 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_community) (0.6.7)\n", "Requirement already satisfied: pydantic-settings<3.0.0,>=2.10.1 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_community) (2.11.0)\n", "Requirement already satisfied: httpx-sse<1.0.0,>=0.4.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_community) (0.4.3)\n", "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.4.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.4.0)\n", "Requirement already satisfied: async-timeout<6.0,>=4.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (4.0.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (21.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.8.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (6.7.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (0.4.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.22.0)\n", "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from dataclasses-json<0.7.0,>=0.6.7->langchain_community) (3.26.1)\n", "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from dataclasses-json<0.7.0,>=0.6.7->langchain_community) (0.9.0)\n", "Requirement already satisfied: langchain-text-splitters<1.0.0,>=0.3.9 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain<2.0.0,>=0.3.27->langchain_community) (0.3.11)\n", "Requirement already satisfied: python-dotenv>=0.21.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic-settings<3.0.0,>=2.10.1->langchain_community) (1.2.1)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain_core) (2.0.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain_core) (1.26.7)\n", "Requirement already satisfied: greenlet!=0.4.17 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from SQLAlchemy<3.0.0,>=1.4.0->langchain_community) (1.1.1)\n", "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7.0,>=0.6.7->langchain_community) (0.4.3)\n", "Requirement already satisfied: ollama<1.0.0,>=0.5.3 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from langchain_ollama) (0.6.1)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (1.9.0)\n", "Requirement already satisfied: sniffio in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (1.2.0)\n", "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from anyio->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain_core) (1.3.1)\n", "Using cached faiss_cpu-1.12.0-cp39-cp39-macosx_13_0_x86_64.whl (8.0 MB)\n", "\u001b[33mWARNING: Error parsing dependencies of pyodbc: Invalid version: '4.0.0-unsupported'\u001b[0m\u001b[33m\n", "\u001b[0mInstalling collected packages: faiss-cpu\n", "\u001b[31mERROR: Exception:\n", "Traceback (most recent call last):\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_internal/cli/base_command.py\", line 107, in _run_wrapper\n", " status = _inner_run()\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_internal/cli/base_command.py\", line 98, in _inner_run\n", " return self.run(options, args)\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_internal/cli/req_command.py\", line 85, in wrapper\n", " return func(self, options, args)\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_internal/commands/install.py\", line 484, in run\n", " installed_versions[distribution.canonical_name] = distribution.version\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_internal/metadata/pkg_resources.py\", line 189, in version\n", " return parse_version(self._dist.version)\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_vendor/packaging/version.py\", line 56, in parse\n", " return Version(version)\n", " File \"/Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages/pip/_vendor/packaging/version.py\", line 202, in __init__\n", " raise InvalidVersion(f\"Invalid version: {version!r}\")\n", "pip._vendor.packaging.version.InvalidVersion: Invalid version: '4.0.0-unsupported'\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "!pip install langchain_core langchain_community langchain_ollama langchain_groq groq pymupdf faiss-cpu==1.12.0" ] }, { "cell_type": "code", "execution_count": 47, "id": "08010c68", "metadata": {}, "outputs": [], "source": [ "# env:\n", "import os\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", "# Chat:\n", "from operator import itemgetter\n", "from langchain_core.documents import Document\n", "from langchain_core.messages import AIMessage, HumanMessage, SystemMessage\n", "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", "from langchain_core.output_parsers import StrOutputParser\n", "# History\n", "from langchain_core.chat_history import BaseChatMessageHistory\n", "from langchain_community.chat_message_histories import ChatMessageHistory\n", "from langchain_core.messages import trim_messages\n", "from langchain_core.runnables import RunnableWithMessageHistory, RunnablePassthrough\n", "# Load\n", "from langchain_community.document_loaders import PyMuPDFLoader\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain.chains.combine_documents import create_stuff_documents_chain\n", "# Store\n", "from langchain_ollama import OllamaEmbeddings\n", "from langchain_community.vectorstores import FAISS\n", "# Retrieve\n", "from langchain.chains import create_retrieval_chain, create_history_aware_retriever" ] }, { "cell_type": "code", "execution_count": 13, "id": "88c8fa42", "metadata": {}, "outputs": [], "source": [ "from IPython.display import Markdown\n", "# from llm import get_response_stream, get_response" ] }, { "cell_type": "code", "execution_count": null, "id": "d9034491", "metadata": {}, "outputs": [], "source": [ "# for chunk in get_response(\"hello\", dummy=True):\n", "# print(chunk, end=\"\", flush=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "aa6e0efe", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "a9fd40ed", "metadata": {}, "source": [ "## LLM:" ] }, { "cell_type": "code", "execution_count": 14, "id": "e5d88e91", "metadata": {}, "outputs": [], "source": [ "MAX_TOKENS = 16000\n", "PER_DOC_TOKENS = 750\n", "SYS_PROMPT_SIZE = 1000 # assumed\n", "TOTAL_DOC_SIZE = 3000\n", "DOC_COUNT = TOTAL_DOC_SIZE // PER_DOC_TOKENS" ] }, { "cell_type": "markdown", "id": "c28166b6", "metadata": {}, "source": [ "### Ollama - Gemma3:4b:" ] }, { "cell_type": "code", "execution_count": 15, "id": "ca5c0e2d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AIMessage(content=\"Hi there! How's your day going so far? 😊 \\n\\nIs there anything you'd like to chat about, or were you just saying hello?\", additional_kwargs={}, response_metadata={'model': 'gemma3:latest', 'created_at': '2025-11-25T18:59:17.333823Z', 'done': True, 'done_reason': 'stop', 'total_duration': 5791387667, 'load_duration': 3908150709, 'prompt_eval_count': 11, 'prompt_eval_duration': 931925916, 'eval_count': 34, 'eval_duration': 901088459, 'logprobs': None, 'model_name': 'gemma3:latest'}, id='run--bb256a1d-1986-4d2f-b866-b8beeb78e3ee-0', usage_metadata={'input_tokens': 11, 'output_tokens': 34, 'total_tokens': 45})" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from langchain_ollama import ChatOllama\n", "\n", "# Gemma3 context size -> 128K (1,31,072)\n", "# 30k -> 91% RAM, 91% GPU\n", "# 25k -> 82% RAM, 89% GPU\n", "# 15k -> 66% RAM, 87% GPU\n", "\n", "\n", "llm = ChatOllama(\n", " model=\"gemma3:latest\", temperature=1,\n", " # model=\"gemma3:1b\", temperature=1,\n", " # num_predict=MAX_OUTPUT_TOKENS,\n", " num_gpu=35, num_ctx=MAX_TOKENS\n", ")\n", "llm.invoke(\"Hii\")" ] }, { "cell_type": "markdown", "id": "a9e656b3", "metadata": {}, "source": [ "### Groq - Llama3:70B:" ] }, { "cell_type": "code", "execution_count": 21, "id": "4c5b89aa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: groq in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (0.36.0)\n", "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (2.12.4)\n", "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (0.28.1)\n", "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (4.11.0)\n", "Requirement already satisfied: typing-extensions<5,>=4.10 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (4.15.0)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (1.9.0)\n", "Requirement already satisfied: sniffio in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from groq) (1.2.0)\n", "Requirement already satisfied: idna>=2.8 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from anyio<5,>=3.5.0->groq) (3.2)\n", "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from anyio<5,>=3.5.0->groq) (1.3.1)\n", "Requirement already satisfied: httpcore==1.* in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpx<1,>=0.23.0->groq) (1.0.9)\n", "Requirement already satisfied: certifi in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpx<1,>=0.23.0->groq) (2021.10.8)\n", "Requirement already satisfied: h11>=0.16 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->groq) (0.16.0)\n", "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic<3,>=1.9.0->groq) (0.4.2)\n", "Requirement already satisfied: pydantic-core==2.41.5 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic<3,>=1.9.0->groq) (2.41.5)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /Users/neetikasaxena/opt/anaconda3/lib/python3.9/site-packages (from pydantic<3,>=1.9.0->groq) (0.7.0)\n" ] } ], "source": [ "!pip install groq" ] }, { "cell_type": "code", "execution_count": 23, "id": "76c84435", "metadata": {}, "outputs": [], "source": [ "# from langchain_groq import ChatGroq\n", "# llm = ChatGroq(\n", "# model=\"llama-3.3-70b-versatile\", temperature=\"1\",\n", "# max_tokens=MAX_TOKENS, api_key=os.environ.get(\"GROQ_API_KEY\"),\n", "# )\n", "# llm.invoke('hi')" ] }, { "cell_type": "code", "execution_count": null, "id": "1e663687", "metadata": {}, "outputs": [], "source": [ "# Markdown(llm.invoke(\"write a story\").content)" ] }, { "cell_type": "code", "execution_count": null, "id": "ffa74252", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "aef563fc", "metadata": {}, "source": [ "## Template:\n", "\n", "
\n", " Limitations: Listed are some base assumptions in certain components of langchain components.\n", "
\n", "\n", "- `CreateHistoryAwareRetriever` assumes the latest-user-message key to be `input`\n", "- `Trimmer` assumes the `ChatHistory` key to be `messages`\n", "- `CreateStuffDocumentChain` assumes returns the clubbed `docs` in key `context`\n", "- To overcome this, you need to use `RunnablePassthrough` or RunnableMap and assign those keys and variables accordingly.\n", "- But remember, you need to manually set such things for all the variables which u are using different than default.\n", "\n", "- So it's always good to follow the default keys and avoid complexity in chains." ] }, { "cell_type": "markdown", "id": "9078666d", "metadata": {}, "source": [ "### Chat:" ] }, { "cell_type": "code", "execution_count": 24, "id": "ca015456", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[langchain_core.messages.function.FunctionMessageChunk, Tag(tag='FunctionMessageChunk')], typing.Annotated[langchain_core.messages.tool.ToolMessageChunk, Tag(tag='ToolMessageChunk')]], FieldInfo(annotation=NoneType, required=True, discriminator=Discriminator(discriminator=, custom_error_type=None, custom_error_message=None, custom_error_context=None))]]}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template=\"You are a highly knowledgeable and helpful AI assistant.\\nYou are provided with the user's chat history and external documents to assist in your response.\\n\\nYour task is to:\\n- Accurately and clearly answer the user's latest question.\\n- Incorporate any relevant information from the context documents enclosed below.\\n- Use appropriate markdown formatting for clarity and readability (e.g., bullet points, headings, code blocks, tables).\\n\\n- If not available in the context, mention that and then answer from your own knowledge.\\nContextual Documents:\\n{context}\"), additional_kwargs={}), MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input} \\n\\n **Strictly stick to the instructions!**'), additional_kwargs={})])" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "template_chat = ChatPromptTemplate.from_messages(\n", " messages=[\n", " (\"system\", \"\".join([\n", " \"You are a highly knowledgeable and helpful AI assistant.\\n\"\n", " \"You are provided with the user's chat history and external documents to assist in your response.\\n\\n\"\n", " \"Your task is to:\\n\"\n", " \"- Accurately and clearly answer the user's latest question.\\n\"\n", " \"- Incorporate any relevant information from the context documents enclosed below.\\n\"\n", " # \"- Reference the source(s) whenever applicable.\\n\"\n", " \"- Use appropriate markdown formatting for clarity and readability (e.g., bullet points, headings, code blocks, tables).\\n\\n\"\n", " \"- If not available in the context, mention that and then answer from your own knowledge.\\n\"\n", " \"Contextual Documents:\\n\"\n", " \"{context}\"\n", " ])),\n", " MessagesPlaceholder(variable_name=\"chat_history\"),\n", " (\"human\", \"{input} \\n\\n **Strictly stick to the instructions!**\")\n", " ]\n", ")\n", "template_chat" ] }, { "cell_type": "code", "execution_count": null, "id": "cfc0a9d9", "metadata": {}, "outputs": [], "source": [ "# Calculate tokens in this System message and pass rest of the max possible chat history:\n", "# trim_keep = model_context - template_tokens - 250 (safe side)\n", "# template_chat.messages[0].content" ] }, { "cell_type": "code", "execution_count": null, "id": "05028a3e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "6659d3b3", "metadata": {}, "source": [ "### Summarize:" ] }, { "cell_type": "code", "execution_count": 55, "id": "0d263f1a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[langchain_core.messages.function.FunctionMessageChunk, Tag(tag='FunctionMessageChunk')], typing.Annotated[langchain_core.messages.tool.ToolMessageChunk, Tag(tag='ToolMessageChunk')]], FieldInfo(annotation=NoneType, required=True, discriminator=Discriminator(discriminator=, custom_error_type=None, custom_error_message=None, custom_error_context=None))]]}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template=\"You are an expert at summarizing conversations into standalone prompts.\\nYou are given a complete chat history, ending with the user's latest message.\\n\\nYour task is to:\\n- Understand the entire conversation context.\\n- Identify references in the latest user message that relate to earlier messages.\\n- Create a single clear, concise, and standalone question or prompt.\\n- This final prompt should be fully understandable without needing the prior conversation.\\n- It will be used to retrieve the most relevant documents.\\n\\nOnly return the rewritten standalone prompt. Do not add explanations or formatting.\"), additional_kwargs={}), MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}. \\n\\n **Make one standalone prompt as asked!**'), additional_kwargs={})])" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "template_summarize = ChatPromptTemplate.from_messages(\n", " messages=[\n", " (\"system\", \"\".join([\n", " \"You are an expert at summarizing conversations into standalone prompts.\\n\"\n", " \"You are given a complete chat history, ending with the user's latest message.\\n\\n\"\n", " \"Your task is to:\\n\"\n", " \"- Understand the entire conversation context.\\n\"\n", " \"- Identify references in the latest user message that relate to earlier messages.\\n\"\n", " \"- Create a single clear, concise, and standalone question or prompt.\\n\"\n", " \"- This final prompt should be fully understandable without needing the prior conversation.\\n\"\n", " \"- It will be used to retrieve the most relevant documents.\\n\\n\"\n", " \"Only return the rewritten standalone prompt. Do not add explanations or formatting.\"\n", " ])),\n", " MessagesPlaceholder(variable_name=\"chat_history\"),\n", " (\"human\", \"{input}. \\n\\n **Make one standalone prompt as asked!**\")\n", " ]\n", ")\n", "template_summarize" ] }, { "cell_type": "code", "execution_count": null, "id": "bcc331c6", "metadata": {}, "outputs": [], "source": [ "# Calculate tokens in this System message and pass rest of the max possible chat history:\n", "# trim_keep = model_context - template_tokens - (1000tok/doc * n-docs) - 250 (safe side)\n", "# template_summarize.messages" ] }, { "cell_type": "code", "execution_count": null, "id": "fb7fa2aa", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "e81f3ce2", "metadata": {}, "source": [ "## Chat Message History:" ] }, { "cell_type": "code", "execution_count": 26, "id": "c10bd691", "metadata": {}, "outputs": [], "source": [ "chat_histories = {}" ] }, { "cell_type": "code", "execution_count": 27, "id": "7cff95dc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Created chat hist for session id: `abv`\n" ] }, { "data": { "text/plain": [ "InMemoryChatMessageHistory(messages=[])" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_session_history(session_id:str) -> BaseChatMessageHistory:\n", " # print(\"*\"*40, session_id, \"*\"*40)\n", " if session_id not in chat_histories:\n", " chat_histories[session_id] = ChatMessageHistory()\n", " # log here for creation of new chat history\n", " print(f\"Created chat hist for session id: `{session_id}`\") \n", " return chat_histories[session_id]\n", "\n", "get_session_history(\"abv\")" ] }, { "cell_type": "code", "execution_count": 28, "id": "b8348a8f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "InMemoryChatMessageHistory(messages=[])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_session_history(\"abv\")" ] }, { "cell_type": "code", "execution_count": null, "id": "7d912cf1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "5e8716e2", "metadata": {}, "source": [ "### Trimmer:\n", "- Due to some un-known issue in variable name of messages being \"chat_history\" or \"messages\" the trimmer cant be used in this RAG implementation.\n", "- Reason: Trimmer expects \"messages\"\n", "- But, if i use \"messages\", then idk why, the summarizer step does not call LLM at all, it just does not work, and is completely untraceable.\n", "- Still, if u want to implement, use one runnable_passthrough before the trimmer in chain to convert chat_history > messages and the after its response, output > chat_history again! " ] }, { "cell_type": "code", "execution_count": 30, "id": "39350a60", "metadata": {}, "outputs": [], "source": [ "# # For summary 15k chat + 1k system and all\n", "trim_summary = trim_messages(\n", " max_tokens=MAX_TOKENS - SYS_PROMPT_SIZE,\n", " strategy=\"last\", token_counter=llm, start_on=\"human\",\n", " allow_partial=True, # include_system=True,\n", ")\n", "\n", "# # For chat 10k chat + 5*1k docs + 1k system and all\n", "trim_chat = trim_messages(\n", " max_tokens=MAX_TOKENS - (TOTAL_DOC_SIZE) - SYS_PROMPT_SIZE,\n", " strategy=\"last\", token_counter=llm, start_on=\"human\",\n", " allow_partial=True, # include_system=True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "e7b73aef", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "34a387c0", "metadata": {}, "source": [ "## VectorStore:\n", "### Embeddings:\n", "- Notice that the embeddings are not offloaded at all to the GPU\n", "- This is done because the Ollama repeatedly keeps loading and un-loading the emb / llm in each call.\n", "- Even when I have memory, IDK why ollama loads only one of them?" ] }, { "cell_type": "code", "execution_count": 31, "id": "4a1fa76c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "OllamaEmbeddings(model='mxbai-embed-large:latest', validate_model_on_init=False, base_url=None, client_kwargs={}, async_client_kwargs={}, sync_client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=0, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embeddings = OllamaEmbeddings(model=\"mxbai-embed-large:latest\", num_gpu=0)\n", "embeddings" ] }, { "cell_type": "markdown", "id": "686e69b0", "metadata": {}, "source": [ "### Loader:" ] }, { "cell_type": "code", "execution_count": 32, "id": "e643d40a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/server\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 38, "id": "ec65bbd5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content=\"• Purpose\\n• This content guide will help you create a best-in-class product page for your items. You'll learn how to build \\nhigh-quality content and deliver a great member experience. Refer to this guide during the item setup process \\nwhile completing the Item Setup Template.\\n• This guide addresses the following content fields:\\n•\\nProduct Titles: Defines the specific order and structure your product title should follow in accordance with \\ncopy description guidelines.\\n•\\nProduct Details: Specifies all required and recommended site attributes that contribute to content quality.\\n•\\nImages: Provides examples of expected images based on product type.\\n• Use this guide before content creation and during item setup to answer any content-related needs.\\n• If you have questions about your product type, please reach out to your merchant or digital merchandising \\npartner.\\n• How to Use This Guide\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nAvoid\\nTitles longer than sixty (60) \\ncharacters will be truncated with \\nan ellipsis (“…”) on the Product \\nLanding Page (PLP).\\nGuidelines \\n•\\nDo not list components included in variety pack\\n•\\nExample: M&M's, Twix, Skittles, Starburst and 3 Musketeers Assorted \\nEaster Egg Hunt Candy Variety Pack (200 pc., 4 lbs.)\\n•\\nList season only if applicable\\n•\\nHalloween, Christmas, Valentines Day, Easter, Fourth of July/ Summer \\n•\\nUse Product Type from table below -\\nCOMPONENTS IN BAG\\nPRODUCT TYPE \\nCount Sugar + Chocolate \\nCandy \\nCount Sugar \\nCandy / Sweets\\nChocolate\\nChocolate\\nBrand + Variety Pack, + Season + Product Type + Size or Quantity\\nMars Variety Pack, Easter Egg Hunt Candy (240 pcs.)\\nVARIETY PACK CANDY\\nNote: The below only pertains to big bag candy \\n\\n\\n\\n\\n\\n\\n\\n\\n|COMPONENTS IN BAG|PRODUCT TYPE|\\n|---|---|\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.) \\n\\n\\n\\n\\n\\n\\n\\n\\n|Approved for Candy & Snacks|Col2|\\n|---|---|\\n|piece|(pc.) / (pcs.)|\\n|pack(s)|(pk.) / (pks.)|\\n|count|(ct.)|\\n|ounces|(oz.)|\\n|pound(s)|(lb.) / (lbs.)|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"• HIGHLIGHTS / KEY FEATURES \\nHighlights Outline \\nUse this outline for all PDPs. Follow the exact order. Template attached here.\\n1. Pack Size for Unit of Measure\\n•\\nHow it’s packaged; individually wrapped, resealable container, shareable \\n2. Product Description\\n•\\nFlavor profile and texture (creamy, crispy, crunchy, etc.) \\nVariety Packs\\n•\\nList all included flavors/brands\\n3. How to Enjoy / Who It’s For / Use Cases \\n•\\nSeasonal, Lunches, On the Go, Snacking, \\n4. Misc. Callouts\\n•\\nOrganic, Fair Trade, Kosher, Keto, (X)-Free, No artificial colors/ flavors, etc.\\n•\\nBusiness – vending machine, good for resale \\nHighlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\\nLess than 65 characters per \\nstatement\\nWhat to avoid:\\n•\\nNO CAPS or periods\\n•\\nNo bolding or special \\nformatting\\n•\\nDo not repeat item name\\n•\\nNo exclamation points!\\n•\\nInputting bullet points in IDM\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club\\nDo not repeat information in the Highlights and/or \\nSpecifications.\\nThe Description calls out the product’s competitive advantage, item quantity, and quality details.\\nIt should include details that our members need to know to make a purchase decision.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety\\n\\n\\n\\n\\n\\n\\n\\n\\n|Col1|Candy|Chips, Popcorn & Crackers|Cookies & Snack Cakes|Nuts & BFY Snacks|\\n|---|---|---|---|---|\\n|**MANDATORY**|Flavor|Flavor|Flavor|Flavor|\\n|**MANDATORY**|Quantity / Weight|Quantity / Weight|Quantity / Weight|Quantity / Weight|\\n|**MANDATORY**|Nutrition|Nutrition|Nutrition|Nutrition|\\n|**MANDATORY**|Ingredients|Ingredients|Ingredients|Ingredients|\\n|**MANDATORY**|Brands|Brands|Brands|Brands|\\n|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|\\n|**SEO SUGGESTIONS**|Candy|Snack(s) / Snack Packs|Snack(s) / Snack Packs|Snack(s) / Snack Packs|\\n|**SEO SUGGESTIONS**|Candy Bars|Chips Varity / Variety Chips|Individually Wrapped Snacks|Individually Wrapped Snacks|\\n|**SEO SUGGESTIONS**|Bulk Candy|Individually Wrapped Snacks|||\\n|**SEO SUGGESTIONS**|Chocolate / Chocolate Candy||||\\n|**SEO SUGGESTIONS**|Candy Variety||||'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 9}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications will be added in IDM under the Marketing Message \\nThe Marketing Message field in IDM will always have 3 entries minimum \\n \\n1. Long Description\\n \\n2. Net Weight, Shelf Life, Pack Size for UOM\\n \\n3. Full Ingredient/ Allergen information\\nSpecs will always be last in the sequence following the Long Description'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 11}, page_content='IMAGING OUTLINE \\n1\\nSellable Unit\\n1 Image Max\\nPack Size for Unit of Measure\\n1 Image\\nNutrition + Ingredients \\n1 Image Minimum \\nProduct\\n2 Images Max \\nNice to Have \\nTOTAL\\n4 Images Minimum\\n10 Images Max\\n2\\n3\\n4\\n5\\n!\\n3 Images Max\\nImages must follow the following order. See slides 18-25 for additional information.\\nIMAGES 1 - 4 ARE REQUIRED'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 12}, page_content='1. Sellable Unit\\nSellable Unit images are limited to 1 max.\\nOPTIONS BELOW:\\n(Image A) – Component(s) are not displayed on the sellable unit. Components are shown in the \\nforeground.\\n(Image B) – Component(s) are displayed on the sellable unit.\\nA\\nB'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 13}, page_content='14\\n2. Nutrition / Ingredients – Individual \\nThis image must be an individual graphic – not an image of the sellable unit.\\nVariety Packs will be limited to 1 image per sku included in the total sellable unit (example on slide \\n21).'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 15}, page_content='3. Pack Size for Unit of Measure\\nImages must clearly display the following –\\n• How members will see the item after opening the box / package.\\n• The number of packs / flavors included in a variety pack.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 16}, page_content='17\\n4. Product\\nProduct Images capture -\\n•\\nComponent highlights \\n•\\nFlavors, Textures, Usage, Storage, etc. \\n•\\nComponent packaging for Variety Packs \\n•\\nVariety packs will showcase each flavor on one image – \\nsingle product shots will be rejected'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 17}, page_content='5. Nice to Have – Dietary + Nutrition Callouts'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 18}, page_content='5. Nice to Have – Company Callouts / Product Usage / Lifestyle Graphics'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 19}, page_content='5. Nice to Have – Pairing Options / Recipes'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 20}, page_content=''),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 21}, page_content=''),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 22}, page_content=''),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 23}, page_content=''),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 24}, page_content=''),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 25}, page_content='')]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# file = PyMuPDFLoader(file_path=\"../assets/pdf_w_text.pdf\", extract_tables='markdown', extract_images=True).load()\n", "# file = PyMuPDFLoader(file_path=\"/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/form_1095_C.pdf\", \n", "# extract_tables='markdown', extract_images=True).load()\n", "\n", "file = PyMuPDFLoader(file_path=\"/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf\", \n", " extract_tables='markdown', extract_images=True).load()\n", "\n", "\n", "\n", "file" ] }, { "cell_type": "markdown", "id": "f927b8aa", "metadata": {}, "source": [ "### Splitter:" ] }, { "cell_type": "code", "execution_count": 39, "id": "3a5247c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=PER_DOC_TOKENS, chunk_overlap=150,\n", ")\n", "splitter" ] }, { "cell_type": "markdown", "id": "afd1b33c", "metadata": {}, "source": [ "### Database:" ] }, { "cell_type": "code", "execution_count": 40, "id": "70c108c2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content=\"• Purpose\\n• This content guide will help you create a best-in-class product page for your items. You'll learn how to build \\nhigh-quality content and deliver a great member experience. Refer to this guide during the item setup process \\nwhile completing the Item Setup Template.\\n• This guide addresses the following content fields:\\n•\\nProduct Titles: Defines the specific order and structure your product title should follow in accordance with \\ncopy description guidelines.\\n•\\nProduct Details: Specifies all required and recommended site attributes that contribute to content quality.\\n•\\nImages: Provides examples of expected images based on product type.\\n• Use this guide before content creation and during item setup to answer any content-related needs.\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content='• Use this guide before content creation and during item setup to answer any content-related needs.\\n• If you have questions about your product type, please reach out to your merchant or digital merchandising \\npartner.\\n• How to Use This Guide'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nAvoid\\nTitles longer than sixty (60) \\ncharacters will be truncated with \\nan ellipsis (“…”) on the Product \\nLanding Page (PLP).\\nGuidelines \\n•\\nDo not list components included in variety pack\\n•\\nExample: M&M's, Twix, Skittles, Starburst and 3 Musketeers Assorted \\nEaster Egg Hunt Candy Variety Pack (200 pc., 4 lbs.)\\n•\\nList season only if applicable\\n•\\nHalloween, Christmas, Valentines Day, Easter, Fourth of July/ Summer \\n•\\nUse Product Type from table below -\\nCOMPONENTS IN BAG\\nPRODUCT TYPE \\nCount Sugar + Chocolate \\nCandy \\nCount Sugar \\nCandy / Sweets\\nChocolate\\nChocolate\\nBrand + Variety Pack, + Season + Product Type + Size or Quantity\\nMars Variety Pack, Easter Egg Hunt Candy (240 pcs.)\\nVARIETY PACK CANDY\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='Chocolate\\nBrand + Variety Pack, + Season + Product Type + Size or Quantity\\nMars Variety Pack, Easter Egg Hunt Candy (240 pcs.)\\nVARIETY PACK CANDY\\nNote: The below only pertains to big bag candy'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='|COMPONENTS IN BAG|PRODUCT TYPE|\\n|---|---|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.)'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='|Approved for Candy & Snacks|Col2|\\n|---|---|\\n|piece|(pc.) / (pcs.)|\\n|pack(s)|(pk.) / (pks.)|\\n|count|(ct.)|\\n|ounces|(oz.)|\\n|pound(s)|(lb.) / (lbs.)|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"• HIGHLIGHTS / KEY FEATURES \\nHighlights Outline \\nUse this outline for all PDPs. Follow the exact order. Template attached here.\\n1. Pack Size for Unit of Measure\\n•\\nHow it’s packaged; individually wrapped, resealable container, shareable \\n2. Product Description\\n•\\nFlavor profile and texture (creamy, crispy, crunchy, etc.) \\nVariety Packs\\n•\\nList all included flavors/brands\\n3. How to Enjoy / Who It’s For / Use Cases \\n•\\nSeasonal, Lunches, On the Go, Snacking, \\n4. Misc. Callouts\\n•\\nOrganic, Fair Trade, Kosher, Keto, (X)-Free, No artificial colors/ flavors, etc.\\n•\\nBusiness – vending machine, good for resale \\nHighlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"Highlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\\nLess than 65 characters per \\nstatement\\nWhat to avoid:\\n•\\nNO CAPS or periods\\n•\\nNo bolding or special \\nformatting\\n•\\nDo not repeat item name\\n•\\nNo exclamation points!\\n•\\nInputting bullet points in IDM\"),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club\\nDo not repeat information in the Highlights and/or \\nSpecifications.\\nThe Description calls out the product’s competitive advantage, item quantity, and quality details.\\nIt should include details that our members need to know to make a purchase decision.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='Candy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|Col1|Candy|Chips, Popcorn & Crackers|Cookies & Snack Cakes|Nuts & BFY Snacks|\\n|---|---|---|---|---|\\n|**MANDATORY**|Flavor|Flavor|Flavor|Flavor|\\n|**MANDATORY**|Quantity / Weight|Quantity / Weight|Quantity / Weight|Quantity / Weight|\\n|**MANDATORY**|Nutrition|Nutrition|Nutrition|Nutrition|\\n|**MANDATORY**|Ingredients|Ingredients|Ingredients|Ingredients|\\n|**MANDATORY**|Brands|Brands|Brands|Brands|\\n|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**SEO SUGGESTIONS**|Candy|Snack(s) / Snack Packs|Snack(s) / Snack Packs|Snack(s) / Snack Packs|\\n|**SEO SUGGESTIONS**|Candy Bars|Chips Varity / Variety Chips|Individually Wrapped Snacks|Individually Wrapped Snacks|\\n|**SEO SUGGESTIONS**|Bulk Candy|Individually Wrapped Snacks|||\\n|**SEO SUGGESTIONS**|Chocolate / Chocolate Candy||||\\n|**SEO SUGGESTIONS**|Candy Variety||||'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 9}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications will be added in IDM under the Marketing Message \\nThe Marketing Message field in IDM will always have 3 entries minimum \\n \\n1. Long Description\\n \\n2. Net Weight, Shelf Life, Pack Size for UOM\\n \\n3. Full Ingredient/ Allergen information\\nSpecs will always be last in the sequence following the Long Description'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 11}, page_content='IMAGING OUTLINE \\n1\\nSellable Unit\\n1 Image Max\\nPack Size for Unit of Measure\\n1 Image\\nNutrition + Ingredients \\n1 Image Minimum \\nProduct\\n2 Images Max \\nNice to Have \\nTOTAL\\n4 Images Minimum\\n10 Images Max\\n2\\n3\\n4\\n5\\n!\\n3 Images Max\\nImages must follow the following order. See slides 18-25 for additional information.\\nIMAGES 1 - 4 ARE REQUIRED'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 12}, page_content='1. Sellable Unit\\nSellable Unit images are limited to 1 max.\\nOPTIONS BELOW:\\n(Image A) – Component(s) are not displayed on the sellable unit. Components are shown in the \\nforeground.\\n(Image B) – Component(s) are displayed on the sellable unit.\\nA\\nB'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 13}, page_content='14\\n2. Nutrition / Ingredients – Individual \\nThis image must be an individual graphic – not an image of the sellable unit.\\nVariety Packs will be limited to 1 image per sku included in the total sellable unit (example on slide \\n21).'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 15}, page_content='3. Pack Size for Unit of Measure\\nImages must clearly display the following –\\n• How members will see the item after opening the box / package.\\n• The number of packs / flavors included in a variety pack.'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 16}, page_content='17\\n4. Product\\nProduct Images capture -\\n•\\nComponent highlights \\n•\\nFlavors, Textures, Usage, Storage, etc. \\n•\\nComponent packaging for Variety Packs \\n•\\nVariety packs will showcase each flavor on one image – \\nsingle product shots will be rejected'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 17}, page_content='5. Nice to Have – Dietary + Nutrition Callouts'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 18}, page_content='5. Nice to Have – Company Callouts / Product Usage / Lifestyle Graphics'),\n", " Document(metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 19}, page_content='5. Nice to Have – Pairing Options / Recipes')]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "splitted = splitter.split_documents(file)\n", "splitted" ] }, { "cell_type": "code", "execution_count": 48, "id": "c8f8f053", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# This initialization needs 4 param, so rather moving to adding one doc manually.\n", "database = FAISS.from_documents(documents=splitted, embedding=embeddings)\n", "database" ] }, { "cell_type": "code", "execution_count": 51, "id": "e5dba060", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\"• Purpose\\n• This content guide will help you create a best-in-class product page for your items. You'll learn how to build \\nhigh-quality content and deliver a great member experience. Refer to this guide during the item setup process \\nwhile completing the Item Setup Template.\\n• This guide addresses the following content fields:\\n•\\nProduct Titles: Defines the specific order and structure your product title should follow in accordance with \\ncopy description guidelines.\\n•\\nProduct Details: Specifies all required and recommended site attributes that contribute to content quality.\\n•\\nImages: Provides examples of expected images based on product type.\\n• Use this guide before content creation and during item setup to answer any content-related needs.\"\n", "106\n" ] } ], "source": [ "print(repr(splitted[0].page_content))\n", "print(len(splitted[0].page_content.split(\" \")))" ] }, { "cell_type": "markdown", "id": "9df58878", "metadata": {}, "source": [ "### Retriever:" ] }, { "cell_type": "markdown", "id": "3bcfabaf", "metadata": {}, "source": [ "- So for 750 chars, there are appx 95 word (max 150)\n", "- In order to retrieve the 3k tokens, we need to have 3k/150 = 20 chunks\n", "- So, set k=20" ] }, { "cell_type": "code", "execution_count": 52, "id": "e01408ed", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=, search_kwargs={'k': 20})" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever = database.as_retriever(\n", " search_type=\"similarity\",\n", " search_kwargs={'k': 20}\n", ")\n", "retriever" ] }, { "cell_type": "code", "execution_count": 53, "id": "2e90fb13", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='7d4c070a-b87b-4fae-b38a-267683d4ef84', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.)'),\n", " Document(id='e5e3c384-622b-4a97-a396-82e0ad7d6987', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 15}, page_content='3. Pack Size for Unit of Measure\\nImages must clearly display the following –\\n• How members will see the item after opening the box / package.\\n• The number of packs / flavors included in a variety pack.'),\n", " Document(id='be579e7c-42ce-49e3-9c13-d551abb17492', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(id='ec9cc761-9717-42e0-ab3b-b3f576e21d9b', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 11}, page_content='IMAGING OUTLINE \\n1\\nSellable Unit\\n1 Image Max\\nPack Size for Unit of Measure\\n1 Image\\nNutrition + Ingredients \\n1 Image Minimum \\nProduct\\n2 Images Max \\nNice to Have \\nTOTAL\\n4 Images Minimum\\n10 Images Max\\n2\\n3\\n4\\n5\\n!\\n3 Images Max\\nImages must follow the following order. See slides 18-25 for additional information.\\nIMAGES 1 - 4 ARE REQUIRED'),\n", " Document(id='6da57c68-741c-4b60-953c-a9b62ef1c532', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(id='c665048c-4150-42a2-924f-ca61128d90ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='|Approved for Candy & Snacks|Col2|\\n|---|---|\\n|piece|(pc.) / (pcs.)|\\n|pack(s)|(pk.) / (pks.)|\\n|count|(ct.)|\\n|ounces|(oz.)|\\n|pound(s)|(lb.) / (lbs.)|'),\n", " Document(id='155759c1-83c0-49a7-afa3-f75c7a939e13', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='|COMPONENTS IN BAG|PRODUCT TYPE|\\n|---|---|'),\n", " Document(id='1db24527-1303-44cc-980b-d8254b91e1ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(id='77697458-a293-493e-a92e-23d661134eb0', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 12}, page_content='1. Sellable Unit\\nSellable Unit images are limited to 1 max.\\nOPTIONS BELOW:\\n(Image A) – Component(s) are not displayed on the sellable unit. Components are shown in the \\nforeground.\\n(Image B) – Component(s) are displayed on the sellable unit.\\nA\\nB'),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(id='ef61e89c-3a23-43e9-8831-113c87089785', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 13}, page_content='14\\n2. Nutrition / Ingredients – Individual \\nThis image must be an individual graphic – not an image of the sellable unit.\\nVariety Packs will be limited to 1 image per sku included in the total sellable unit (example on slide \\n21).'),\n", " Document(id='e7c8d0c7-f2ff-486a-ae6e-ca4092678120', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 9}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications will be added in IDM under the Marketing Message \\nThe Marketing Message field in IDM will always have 3 entries minimum \\n \\n1. Long Description\\n \\n2. Net Weight, Shelf Life, Pack Size for UOM\\n \\n3. Full Ingredient/ Allergen information\\nSpecs will always be last in the sequence following the Long Description'),\n", " Document(id='8454018a-c3c2-4203-8582-256435521696', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='Candy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety'),\n", " Document(id='3593c89f-83d6-4f17-9913-61208f44bad4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(id='a4206354-0f14-42af-862b-65785c4be800', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(id='c9435a94-f045-4318-ae25-98f592e45b4c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(id='520dc0e6-0b0c-4e0c-b2bd-033c32c2a39e', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 17}, page_content='5. Nice to Have – Dietary + Nutrition Callouts'),\n", " Document(id='a00ee7ca-4ebc-4c1e-a33a-17c03e287d39', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"• HIGHLIGHTS / KEY FEATURES \\nHighlights Outline \\nUse this outline for all PDPs. Follow the exact order. Template attached here.\\n1. Pack Size for Unit of Measure\\n•\\nHow it’s packaged; individually wrapped, resealable container, shareable \\n2. Product Description\\n•\\nFlavor profile and texture (creamy, crispy, crunchy, etc.) \\nVariety Packs\\n•\\nList all included flavors/brands\\n3. How to Enjoy / Who It’s For / Use Cases \\n•\\nSeasonal, Lunches, On the Go, Snacking, \\n4. Misc. Callouts\\n•\\nOrganic, Fair Trade, Kosher, Keto, (X)-Free, No artificial colors/ flavors, etc.\\n•\\nBusiness – vending machine, good for resale \\nHighlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\"),\n", " Document(id='760124eb-be66-4b0d-8e8a-886d931e3c18', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club'),\n", " Document(id='2a09c111-c967-4426-87d3-585e68be874a', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content='• Use this guide before content creation and during item setup to answer any content-related needs.\\n• If you have questions about your product type, please reach out to your merchant or digital merchandising \\npartner.\\n• How to Use This Guide')]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever.invoke(\"what is the unit of measure?\")" ] }, { "cell_type": "markdown", "id": "9a1aca4c", "metadata": {}, "source": [ "## Summarizer:\n", "\n", "- Old method.\n", "- This is too much hard-coded, switch to the retrieval method with the create_stuff_chain to ingest the documents and get the answer in one single chain call." ] }, { "cell_type": "code", "execution_count": null, "id": "7d4b9324", "metadata": {}, "outputs": [], "source": [ "# chain = (\n", "# RunnablePassthrough().assign(messages=itemgetter(\"messages\") | trim_chat)\n", "# | template_summarize | llm | StrOutputParser())\n", "\n", "# summarizer_llm = RunnableWithMessageHistory(\n", "# runnable=chain,\n", "# get_session_history=get_session_history,\n", "# input_messages_key=\"input\",\n", "# history_messages_key=\"messages\",\n", "# )" ] }, { "cell_type": "code", "execution_count": null, "id": "ce0e8b73", "metadata": {}, "outputs": [], "source": [ "# chat_histories[10] = ChatMessageHistory()\n", "# chat_histories[10].messages = [\n", "# HumanMessage(\"Hello, I'm Bhushan, What is your name?\"),\n", "# AIMessage(\"I am an AI assistant. I am not a human like you.\"),\n", "# HumanMessage(\"What is Artificial General Intelligence?\"),\n", "# AIMessage(\"Artificial General Intelligence (AGI) refers to highly autonomous systems that outperform humans at most economically valuable work.\"),\n", "# ]\n", "# # )" ] }, { "cell_type": "code", "execution_count": null, "id": "afca766a", "metadata": {}, "outputs": [], "source": [ "# summarizer_llm.invoke(\n", "# input={\"input\": \"So it's not achieved yet?\", },\n", "# config={\"configurable\": {\"session_id\": 10}}\n", "# )" ] }, { "cell_type": "code", "execution_count": null, "id": "364a3497", "metadata": {}, "outputs": [], "source": [ "# chat_histories[10].messages" ] }, { "cell_type": "code", "execution_count": null, "id": "5fba9fca", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "3483077d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "e3fb841d", "metadata": {}, "source": [ "## Runnable With History:\n", "- Commented out as it's un-necessary and not used in the code.\n", "- But, keep it, as it can be used in the future." ] }, { "cell_type": "code", "execution_count": null, "id": "ad4a458c", "metadata": {}, "outputs": [], "source": [ "# chain = (\n", "# RunnablePassthrough(name=\"Trim Chat History\").assign(messages=itemgetter(\"messages\") | trim_chat)\n", "# | template_chat | llm | StrOutputParser())\n", "\n", "# chat_llm = RunnableWithMessageHistory(\n", "# runnable=chain,\n", "# get_session_history=get_session_history,\n", "# input_messages_key=\"input\",\n", "# history_messages_key=\"messages\",\n", "# )" ] }, { "cell_type": "code", "execution_count": null, "id": "5ff80224", "metadata": {}, "outputs": [], "source": [ "# chat_llm.invoke(\n", "# input={\n", "# \"input\": \"Hello, I'm Bhushan, What is your name?\",\n", "# \"context\": \"This is some random document which contains some random information.\"\n", "# },\n", "# config={\n", "# \"configurable\": {\n", "# \"session_id\": 15\n", "# }\n", "# }\n", "# )" ] }, { "cell_type": "code", "execution_count": null, "id": "87d49b24", "metadata": {}, "outputs": [], "source": [ "# chat_llm.invoke(\n", "# input={\n", "# \"input\": \"What did we discuss?\",\n", "# \"context\": \"There is no context available for this question.\"\n", "# },\n", "# config={\n", "# \"configurable\": {\n", "# \"session_id\": 15\n", "# }\n", "# }\n", "# )" ] }, { "cell_type": "code", "execution_count": null, "id": "572626cf", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "a284c06c", "metadata": {}, "source": [ "- If () add option to paste link and scrap whole content from there." ] }, { "cell_type": "markdown", "id": "04c1d7ad", "metadata": {}, "source": [ "## Chain:" ] }, { "cell_type": "code", "execution_count": null, "id": "d84c643b", "metadata": {}, "outputs": [], "source": [ "# Make a passthrough which prints variables and passes them to next step\n", "# def print_and_pass(input):\n", "# print(input)\n", "# return input" ] }, { "cell_type": "code", "execution_count": 56, "id": "1dba081b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RunnableWithMessageHistory(bound=RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={\n", " chat_history: RunnableBinding(bound=RunnableLambda(_enter_history), kwargs={}, config={'run_name': 'load_history'}, config_factories=[])\n", "}), kwargs={}, config={'run_name': 'insert_history'}, config_factories=[])\n", "| RunnableBinding(bound=RunnableLambda(_call_runnable_sync), kwargs={}, config={'run_name': 'check_sync_or_async'}, config_factories=[]), kwargs={}, config={'run_name': 'RunnableWithMessageHistory'}, config_factories=[]), kwargs={}, config={}, config_factories=[], get_session_history=, input_messages_key='input', output_messages_key='answer', history_messages_key='chat_history', history_factory_config=[ConfigurableFieldSpec(id='session_id', annotation=, name='Session ID', description='Unique identifier for a session.', default='', is_shared=True, dependencies=None)])" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 3 User Input + Chat History > Summarizer Template > Standalone Que > Get Docs\n", "summarize_chain = create_history_aware_retriever(llm, retriever, template_summarize)\n", "# summarize_chain = trim_summary | create_history_aware_retriever(llm, retriever, template_summarize)\n", "\n", "# 4 Multiple Docs > Combine All > Chat Template > Final Output\n", "qa_chain = create_stuff_documents_chain(llm=llm, prompt=template_chat)\n", "\n", "# 2 Input + Chat History > [ `Summarizer Template` > `Get Docs` ] > [ `Combine` > `Chat Template` ] > Output\n", "rag_chain = create_retrieval_chain(summarize_chain , qa_chain)\n", "\n", "# 1 Final main chain:\n", "conversational_rag_chain = RunnableWithMessageHistory(\n", " runnable=rag_chain,\n", " get_session_history=get_session_history,\n", " input_messages_key=\"input\",\n", " history_messages_key=\"chat_history\",\n", " output_messages_key=\"answer\",\n", ")\n", "conversational_rag_chain" ] }, { "cell_type": "markdown", "id": "2faa3a7d", "metadata": {}, "source": [ "## Test:" ] }, { "cell_type": "markdown", "id": "19e77ef7", "metadata": {}, "source": [ "### Database:" ] }, { "cell_type": "code", "execution_count": 57, "id": "c84f7631", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['0d0bb983-0436-4bc3-b1a9-90563eaf1760',\n", " '63ea4109-e11b-48eb-aac5-cf234a9c3d0e',\n", " '64ab81dd-caa4-4fc6-a802-f8caeb372863',\n", " '08214f30-26d1-4e5d-9a7a-73ab10d6b64e',\n", " '52b436ad-2894-4976-bddf-71e18d426228',\n", " '6764ac04-e32c-4353-b273-c896c1767b0c',\n", " 'bcf19987-4aa1-44f7-a456-0d600f0b252d']" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "database.add_documents(\n", " [\n", " Document(\"Cats and Dogs are both popular pets.\"),\n", " Document(\"Cats are independent and low-maintenance pets.\"),\n", " Document(\"Dogs are loyal and require more attention.\"),\n", " Document(\"Cats are often seen as aloof and mysterious.\"),\n", " Document(\"Dogs are known for their loyalty and companionship.\"),\n", " Document(\"Cats are great for small living spaces.\"),\n", " Document(\"Cats are NOT AT ALL LOYAL.\"),\n", " ],\n", " embedding=embeddings\n", ")" ] }, { "cell_type": "code", "execution_count": 59, "id": "f8c1713d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='52b436ad-2894-4976-bddf-71e18d426228', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),\n", " Document(id='64ab81dd-caa4-4fc6-a802-f8caeb372863', metadata={}, page_content='Dogs are loyal and require more attention.'),\n", " Document(id='0d0bb983-0436-4bc3-b1a9-90563eaf1760', metadata={}, page_content='Cats and Dogs are both popular pets.'),\n", " Document(id='08214f30-26d1-4e5d-9a7a-73ab10d6b64e', metadata={}, page_content='Cats are often seen as aloof and mysterious.')]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# database.search(search_type='similarity', query=\"what is the unit of measure?\", k=8)\n", "database.search(search_type='similarity', query=\"what are dogs known for ?\", k=4)" ] }, { "cell_type": "markdown", "id": "05049e4e", "metadata": {}, "source": [ "### Summarize Chain:" ] }, { "cell_type": "code", "execution_count": 121, "id": "ffbce540", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='0d0bb983-0436-4bc3-b1a9-90563eaf1760', metadata={}, page_content='Cats and Dogs are both popular pets.'),\n", " Document(id='52b436ad-2894-4976-bddf-71e18d426228', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),\n", " Document(id='64ab81dd-caa4-4fc6-a802-f8caeb372863', metadata={}, page_content='Dogs are loyal and require more attention.'),\n", " Document(id='75f46a43-e548-4f7a-a897-08578f626d0e', metadata={'session_id': 'user_3'}, page_content=\"Earth's atmosphere is composed primarily of nitrogen (about 78%) and oxygen (about 21%).\"),\n", " Document(id='71667b85-67c7-4157-abad-62300dc83e2c', metadata={'session_id': 'user_3'}, page_content=\"Earth's atmosphere is composed primarily of nitrogen (about 78%) and oxygen (about 21%).\"),\n", " Document(id='64a8822c-bb45-4da0-8ae3-cff5f251d1de', metadata={'session_id': 'public'}, page_content='Earth, Sun and Moon all three are part of the Solar System.'),\n", " Document(id='95c2747e-37a8-4161-afe3-4bcfde7b7bb7', metadata={'session_id': 'public'}, page_content='Earth, Sun and Moon all three are part of the Solar System.'),\n", " Document(id='0d7fcf01-3266-4907-9d61-e9b774420376', metadata={'session_id': 'public'}, page_content='Earth, Sun and Moon all three are part of the Solar System.'),\n", " Document(id='977e64a1-b5e0-4ad1-bc2d-00626743436c', metadata={'session_id': 'user_3'}, page_content='Earth is the third planet from the Sun and the only known planet to support life.'),\n", " Document(id='15b5166e-7dda-404c-8756-817652b7380d', metadata={'session_id': 'user_3'}, page_content='Earth is the third planet from the Sun and the only known planet to support life.'),\n", " Document(id='53e6ec11-186e-4a24-86eb-d55d1f25468b', metadata={'session_id': 'user_3'}, page_content=\"Earth's surface is covered by about 71% water.\"),\n", " Document(id='047c0ece-de86-427b-841d-d8ef1505efc8', metadata={'session_id': 'user_3'}, page_content=\"Earth's surface is covered by about 71% water.\"),\n", " Document(id='3593c89f-83d6-4f17-9913-61208f44bad4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(id='4122e3a9-bb52-47dc-9498-1bcc36c1ef6b', metadata={'session_id': 'user_3'}, page_content='The Earth has one natural satellite, the Moon.'),\n", " Document(id='d7f937d7-b4bd-4e31-b065-9fb0d55e53e3', metadata={'session_id': 'user_3'}, page_content='The Earth has one natural satellite, the Moon.'),\n", " Document(id='be579e7c-42ce-49e3-9c13-d551abb17492', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(id='c9435a94-f045-4318-ae25-98f592e45b4c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(id='44480fa4-596e-4978-9dc2-cd889aa0c3e9', metadata={'session_id': 'user_2'}, page_content=\"The Moon is Earth's only natural satellite and the fifth largest moon in the Solar System.\"),\n", " Document(id='3f0e6f33-fc5d-470d-9f00-2c7c4381aaa5', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**SEO SUGGESTIONS**|Candy|Snack(s) / Snack Packs|Snack(s) / Snack Packs|Snack(s) / Snack Packs|\\n|**SEO SUGGESTIONS**|Candy Bars|Chips Varity / Variety Chips|Individually Wrapped Snacks|Individually Wrapped Snacks|\\n|**SEO SUGGESTIONS**|Bulk Candy|Individually Wrapped Snacks|||\\n|**SEO SUGGESTIONS**|Chocolate / Chocolate Candy||||\\n|**SEO SUGGESTIONS**|Candy Variety||||')]" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "summarize_chain.invoke(\n", " input={\n", " \"input\": \"What animal was i talking about? Which one is most common apart from that animal?\",\n", " \"chat_history\": [\n", " HumanMessage(\"Hello, I'm Bhushan, What is your name?\"),\n", " AIMessage(\"I am an AI assistant. I am not a human like you.\"),\n", " HumanMessage(\"What are ur thoughts on Dogs?\"),\n", " AIMessage(\"Dogs are loyal and require more attention.\"), \n", " ]\n", " },\n", ")" ] }, { "cell_type": "markdown", "id": "34e264d7", "metadata": {}, "source": [ "### QA - Chain:" ] }, { "cell_type": "code", "execution_count": 61, "id": "9bbf0dbd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Retrieval-Augmented Generation.'" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qa_chain.invoke(\n", " input={\n", " \"input\": \"Full form of RAG?\",\n", " \"context\": [Document(page_content=\"This is some random document which contains some random information.\")],\n", " \"chat_history\": [\n", " HumanMessage(\"hi\"),\n", " AIMessage(\"hello\"),\n", " HumanMessage(\"What is RAG?\"),\n", " AIMessage(\"RAG is a technique to combine retrieval and generation.\"),\n", " ]\n", " },\n", " # config={\"configurable\": {\"session_id\": 15}}\n", ")" ] }, { "cell_type": "markdown", "id": "8dac4910", "metadata": {}, "source": [ "### RAG Chain:" ] }, { "cell_type": "code", "execution_count": 62, "id": "4030623c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input': 'Full form of RAG?',\n", " 'context': [Document(id='fe52e86f-bb26-457d-a75d-c4b8d23d1d28', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"Highlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\\nLess than 65 characters per \\nstatement\\nWhat to avoid:\\n•\\nNO CAPS or periods\\n•\\nNo bolding or special \\nformatting\\n•\\nDo not repeat item name\\n•\\nNo exclamation points!\\n•\\nInputting bullet points in IDM\"),\n", " Document(id='6da57c68-741c-4b60-953c-a9b62ef1c532', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(id='760124eb-be66-4b0d-8e8a-886d931e3c18', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club'),\n", " Document(id='2af10e76-2a41-486a-82ab-56f14bf978df', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content=\"• Purpose\\n• This content guide will help you create a best-in-class product page for your items. You'll learn how to build \\nhigh-quality content and deliver a great member experience. Refer to this guide during the item setup process \\nwhile completing the Item Setup Template.\\n• This guide addresses the following content fields:\\n•\\nProduct Titles: Defines the specific order and structure your product title should follow in accordance with \\ncopy description guidelines.\\n•\\nProduct Details: Specifies all required and recommended site attributes that contribute to content quality.\\n•\\nImages: Provides examples of expected images based on product type.\\n• Use this guide before content creation and during item setup to answer any content-related needs.\"),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(id='3f0e6f33-fc5d-470d-9f00-2c7c4381aaa5', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**SEO SUGGESTIONS**|Candy|Snack(s) / Snack Packs|Snack(s) / Snack Packs|Snack(s) / Snack Packs|\\n|**SEO SUGGESTIONS**|Candy Bars|Chips Varity / Variety Chips|Individually Wrapped Snacks|Individually Wrapped Snacks|\\n|**SEO SUGGESTIONS**|Bulk Candy|Individually Wrapped Snacks|||\\n|**SEO SUGGESTIONS**|Chocolate / Chocolate Candy||||\\n|**SEO SUGGESTIONS**|Candy Variety||||'),\n", " Document(id='8cf99a36-2668-440a-9abf-c611b0cd906d', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 16}, page_content='17\\n4. Product\\nProduct Images capture -\\n•\\nComponent highlights \\n•\\nFlavors, Textures, Usage, Storage, etc. \\n•\\nComponent packaging for Variety Packs \\n•\\nVariety packs will showcase each flavor on one image – \\nsingle product shots will be rejected'),\n", " Document(id='2a09c111-c967-4426-87d3-585e68be874a', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content='• Use this guide before content creation and during item setup to answer any content-related needs.\\n• If you have questions about your product type, please reach out to your merchant or digital merchandising \\npartner.\\n• How to Use This Guide'),\n", " Document(id='c9435a94-f045-4318-ae25-98f592e45b4c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(id='be579e7c-42ce-49e3-9c13-d551abb17492', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(id='e7c8d0c7-f2ff-486a-ae6e-ca4092678120', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 9}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications will be added in IDM under the Marketing Message \\nThe Marketing Message field in IDM will always have 3 entries minimum \\n \\n1. Long Description\\n \\n2. Net Weight, Shelf Life, Pack Size for UOM\\n \\n3. Full Ingredient/ Allergen information\\nSpecs will always be last in the sequence following the Long Description'),\n", " Document(id='1db24527-1303-44cc-980b-d8254b91e1ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(id='4ec957d7-8a21-4a05-97b7-629cc16b69b2', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club\\nDo not repeat information in the Highlights and/or \\nSpecifications.\\nThe Description calls out the product’s competitive advantage, item quantity, and quality details.\\nIt should include details that our members need to know to make a purchase decision.'),\n", " Document(id='a00ee7ca-4ebc-4c1e-a33a-17c03e287d39', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"• HIGHLIGHTS / KEY FEATURES \\nHighlights Outline \\nUse this outline for all PDPs. Follow the exact order. Template attached here.\\n1. Pack Size for Unit of Measure\\n•\\nHow it’s packaged; individually wrapped, resealable container, shareable \\n2. Product Description\\n•\\nFlavor profile and texture (creamy, crispy, crunchy, etc.) \\nVariety Packs\\n•\\nList all included flavors/brands\\n3. How to Enjoy / Who It’s For / Use Cases \\n•\\nSeasonal, Lunches, On the Go, Snacking, \\n4. Misc. Callouts\\n•\\nOrganic, Fair Trade, Kosher, Keto, (X)-Free, No artificial colors/ flavors, etc.\\n•\\nBusiness – vending machine, good for resale \\nHighlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\"),\n", " Document(id='3593c89f-83d6-4f17-9913-61208f44bad4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(id='a4206354-0f14-42af-862b-65785c4be800', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(id='a2786271-e658-4868-9668-10145acfcabf', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 18}, page_content='5. Nice to Have – Company Callouts / Product Usage / Lifestyle Graphics'),\n", " Document(id='7d4c070a-b87b-4fae-b38a-267683d4ef84', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.)'),\n", " Document(id='77697458-a293-493e-a92e-23d661134eb0', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 12}, page_content='1. Sellable Unit\\nSellable Unit images are limited to 1 max.\\nOPTIONS BELOW:\\n(Image A) – Component(s) are not displayed on the sellable unit. Components are shown in the \\nforeground.\\n(Image B) – Component(s) are displayed on the sellable unit.\\nA\\nB'),\n", " Document(id='8454018a-c3c2-4203-8582-256435521696', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='Candy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety')],\n", " 'chat_history': [HumanMessage(content='hi', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='hello', additional_kwargs={}, response_metadata={}),\n", " HumanMessage(content='What is RAG?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='RAG is a technique to combine retrieval and generation.', additional_kwargs={}, response_metadata={})],\n", " 'answer': 'Retrieval-Augmented Generation'}" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rag_chain.invoke(\n", " input={\n", " \"input\": \"Full form of RAG?\",\n", " \"context\": [Document(page_content=\"This is some random document which contains some random information.\")],\n", " \"chat_history\": [\n", " HumanMessage(\"hi\"),\n", " AIMessage(\"hello\"),\n", " HumanMessage(\"What is RAG?\"),\n", " AIMessage(\"RAG is a technique to combine retrieval and generation.\"),\n", " ]\n", " },\n", " # config={\"configurable\": {\"session_id\": 15}}\n", ")" ] }, { "cell_type": "markdown", "id": "b2d18165", "metadata": {}, "source": [ "### Conv RAG:" ] }, { "cell_type": "code", "execution_count": 63, "id": "5c06d4f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Created chat hist for session id: `120`\n" ] }, { "data": { "text/plain": [ "{'input': 'Hello, I am Sanchit. What abt u?',\n", " 'chat_history': [],\n", " 'context': [Document(id='a2786271-e658-4868-9668-10145acfcabf', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 18}, page_content='5. Nice to Have – Company Callouts / Product Usage / Lifestyle Graphics'),\n", " Document(id='2a09c111-c967-4426-87d3-585e68be874a', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content='• Use this guide before content creation and during item setup to answer any content-related needs.\\n• If you have questions about your product type, please reach out to your merchant or digital merchandising \\npartner.\\n• How to Use This Guide'),\n", " Document(id='3f0e6f33-fc5d-470d-9f00-2c7c4381aaa5', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**SEO SUGGESTIONS**|Candy|Snack(s) / Snack Packs|Snack(s) / Snack Packs|Snack(s) / Snack Packs|\\n|**SEO SUGGESTIONS**|Candy Bars|Chips Varity / Variety Chips|Individually Wrapped Snacks|Individually Wrapped Snacks|\\n|**SEO SUGGESTIONS**|Bulk Candy|Individually Wrapped Snacks|||\\n|**SEO SUGGESTIONS**|Chocolate / Chocolate Candy||||\\n|**SEO SUGGESTIONS**|Candy Variety||||'),\n", " Document(id='520dc0e6-0b0c-4e0c-b2bd-033c32c2a39e', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 17}, page_content='5. Nice to Have – Dietary + Nutrition Callouts'),\n", " Document(id='3c82eab7-5235-4d72-816f-140a84a00bd0', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 19}, page_content='5. Nice to Have – Pairing Options / Recipes'),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(id='8454018a-c3c2-4203-8582-256435521696', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='Candy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety'),\n", " Document(id='6da57c68-741c-4b60-953c-a9b62ef1c532', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(id='be579e7c-42ce-49e3-9c13-d551abb17492', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(id='155759c1-83c0-49a7-afa3-f75c7a939e13', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='|COMPONENTS IN BAG|PRODUCT TYPE|\\n|---|---|'),\n", " Document(id='c665048c-4150-42a2-924f-ca61128d90ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='|Approved for Candy & Snacks|Col2|\\n|---|---|\\n|piece|(pc.) / (pcs.)|\\n|pack(s)|(pk.) / (pks.)|\\n|count|(ct.)|\\n|ounces|(oz.)|\\n|pound(s)|(lb.) / (lbs.)|'),\n", " Document(id='760124eb-be66-4b0d-8e8a-886d931e3c18', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club'),\n", " Document(id='c9435a94-f045-4318-ae25-98f592e45b4c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(id='a4206354-0f14-42af-862b-65785c4be800', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(id='fe52e86f-bb26-457d-a75d-c4b8d23d1d28', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"Highlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\\nLess than 65 characters per \\nstatement\\nWhat to avoid:\\n•\\nNO CAPS or periods\\n•\\nNo bolding or special \\nformatting\\n•\\nDo not repeat item name\\n•\\nNo exclamation points!\\n•\\nInputting bullet points in IDM\"),\n", " Document(id='60dbf97e-e584-40a4-acb9-2981e93149d4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nAvoid\\nTitles longer than sixty (60) \\ncharacters will be truncated with \\nan ellipsis (“…”) on the Product \\nLanding Page (PLP).\\nGuidelines \\n•\\nDo not list components included in variety pack\\n•\\nExample: M&M's, Twix, Skittles, Starburst and 3 Musketeers Assorted \\nEaster Egg Hunt Candy Variety Pack (200 pc., 4 lbs.)\\n•\\nList season only if applicable\\n•\\nHalloween, Christmas, Valentines Day, Easter, Fourth of July/ Summer \\n•\\nUse Product Type from table below -\\nCOMPONENTS IN BAG\\nPRODUCT TYPE \\nCount Sugar + Chocolate \\nCandy \\nCount Sugar \\nCandy / Sweets\\nChocolate\\nChocolate\\nBrand + Variety Pack, + Season + Product Type + Size or Quantity\\nMars Variety Pack, Easter Egg Hunt Candy (240 pcs.)\\nVARIETY PACK CANDY\"),\n", " Document(id='1db24527-1303-44cc-980b-d8254b91e1ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(id='7d4c070a-b87b-4fae-b38a-267683d4ef84', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.)'),\n", " Document(id='8cf99a36-2668-440a-9abf-c611b0cd906d', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 16}, page_content='17\\n4. Product\\nProduct Images capture -\\n•\\nComponent highlights \\n•\\nFlavors, Textures, Usage, Storage, etc. \\n•\\nComponent packaging for Variety Packs \\n•\\nVariety packs will showcase each flavor on one image – \\nsingle product shots will be rejected'),\n", " Document(id='f72005cd-08f5-4df8-9dea-69692a0187f1', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='Chocolate\\nBrand + Variety Pack, + Season + Product Type + Size or Quantity\\nMars Variety Pack, Easter Egg Hunt Candy (240 pcs.)\\nVARIETY PACK CANDY\\nNote: The below only pertains to big bag candy')],\n", " 'answer': 'Hello Sanchit, nice to meet you! I am a helpful AI assistant. I can answer your questions and provide information based on the documents I have been provided.'}" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conversational_rag_chain.invoke(\n", " input={\"input\":\"Hello, I am Sanchit. What abt u?\"},\n", " config={\"configurable\":{\"session_id\":120}}\n", ")" ] }, { "cell_type": "code", "execution_count": 64, "id": "faaf1fe2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input': 'What are popular pets?',\n", " 'chat_history': [HumanMessage(content='Hello, I am Sanchit. What abt u?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Hello Sanchit, nice to meet you! I am a helpful AI assistant. I can answer your questions and provide information based on the documents I have been provided.', additional_kwargs={}, response_metadata={})],\n", " 'context': [Document(id='0d0bb983-0436-4bc3-b1a9-90563eaf1760', metadata={}, page_content='Cats and Dogs are both popular pets.'),\n", " Document(id='52b436ad-2894-4976-bddf-71e18d426228', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),\n", " Document(id='64ab81dd-caa4-4fc6-a802-f8caeb372863', metadata={}, page_content='Dogs are loyal and require more attention.'),\n", " Document(id='63ea4109-e11b-48eb-aac5-cf234a9c3d0e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),\n", " Document(id='6764ac04-e32c-4353-b273-c896c1767b0c', metadata={}, page_content='Cats are great for small living spaces.'),\n", " Document(id='08214f30-26d1-4e5d-9a7a-73ab10d6b64e', metadata={}, page_content='Cats are often seen as aloof and mysterious.'),\n", " Document(id='d311e0cf-31b8-468f-bda0-031f4da0135c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|Col1|Candy|Chips, Popcorn & Crackers|Cookies & Snack Cakes|Nuts & BFY Snacks|\\n|---|---|---|---|---|\\n|**MANDATORY**|Flavor|Flavor|Flavor|Flavor|\\n|**MANDATORY**|Quantity / Weight|Quantity / Weight|Quantity / Weight|Quantity / Weight|\\n|**MANDATORY**|Nutrition|Nutrition|Nutrition|Nutrition|\\n|**MANDATORY**|Ingredients|Ingredients|Ingredients|Ingredients|\\n|**MANDATORY**|Brands|Brands|Brands|Brands|\\n|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|'),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(id='a4206354-0f14-42af-862b-65785c4be800', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(id='520dc0e6-0b0c-4e0c-b2bd-033c32c2a39e', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 17}, page_content='5. Nice to Have – Dietary + Nutrition Callouts'),\n", " Document(id='a2786271-e658-4868-9668-10145acfcabf', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 18}, page_content='5. Nice to Have – Company Callouts / Product Usage / Lifestyle Graphics'),\n", " Document(id='3593c89f-83d6-4f17-9913-61208f44bad4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(id='3c82eab7-5235-4d72-816f-140a84a00bd0', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 19}, page_content='5. Nice to Have – Pairing Options / Recipes'),\n", " Document(id='bcf19987-4aa1-44f7-a456-0d600f0b252d', metadata={}, page_content='Cats are NOT AT ALL LOYAL.'),\n", " Document(id='6da57c68-741c-4b60-953c-a9b62ef1c532', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(id='760124eb-be66-4b0d-8e8a-886d931e3c18', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club'),\n", " Document(id='1db24527-1303-44cc-980b-d8254b91e1ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(id='155759c1-83c0-49a7-afa3-f75c7a939e13', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='|COMPONENTS IN BAG|PRODUCT TYPE|\\n|---|---|'),\n", " Document(id='c665048c-4150-42a2-924f-ca61128d90ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='|Approved for Candy & Snacks|Col2|\\n|---|---|\\n|piece|(pc.) / (pcs.)|\\n|pack(s)|(pk.) / (pks.)|\\n|count|(ct.)|\\n|ounces|(oz.)|\\n|pound(s)|(lb.) / (lbs.)|'),\n", " Document(id='8454018a-c3c2-4203-8582-256435521696', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='Candy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety')],\n", " 'answer': 'Based on the provided documents, popular pets are Cats and Dogs.'}" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conversational_rag_chain.invoke(\n", " input={\"input\":\"What are popular pets?\"},\n", " config={\"configurable\":{\"session_id\":120}}\n", ")" ] }, { "cell_type": "code", "execution_count": 65, "id": "f55d823f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input': 'Describe CATS?',\n", " 'chat_history': [HumanMessage(content='Hello, I am Sanchit. What abt u?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Hello Sanchit, nice to meet you! I am a helpful AI assistant. I can answer your questions and provide information based on the documents I have been provided.', additional_kwargs={}, response_metadata={}),\n", " HumanMessage(content='What are popular pets?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Based on the provided documents, popular pets are Cats and Dogs.', additional_kwargs={}, response_metadata={})],\n", " 'context': [Document(id='63ea4109-e11b-48eb-aac5-cf234a9c3d0e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),\n", " Document(id='08214f30-26d1-4e5d-9a7a-73ab10d6b64e', metadata={}, page_content='Cats are often seen as aloof and mysterious.'),\n", " Document(id='6764ac04-e32c-4353-b273-c896c1767b0c', metadata={}, page_content='Cats are great for small living spaces.'),\n", " Document(id='0d0bb983-0436-4bc3-b1a9-90563eaf1760', metadata={}, page_content='Cats and Dogs are both popular pets.'),\n", " Document(id='bcf19987-4aa1-44f7-a456-0d600f0b252d', metadata={}, page_content='Cats are NOT AT ALL LOYAL.'),\n", " Document(id='52b436ad-2894-4976-bddf-71e18d426228', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),\n", " Document(id='64ab81dd-caa4-4fc6-a802-f8caeb372863', metadata={}, page_content='Dogs are loyal and require more attention.'),\n", " Document(id='3593c89f-83d6-4f17-9913-61208f44bad4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(id='1db24527-1303-44cc-980b-d8254b91e1ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 8}, page_content='• SPECIFICATIONS / MARKETING MESSAGE \\nSpecifications consist of essential facts about the product displayed in a list of bullets or table \\nformat.\\nAll candy and snack items must include the following –\\n1.\\nNet Weight \\n2.\\nShelf Life\\n3.\\nPack Size for UOM\\nThis must be broken out in variety packs\\n4.\\nFull Ingredient List \\nThis must be provided in IDM by supplier\\n5.\\nAllergen + Warnings Information\\n*Variety packs must include breakdowns of ingredient and allergen information for \\neach component.\\nDo not repeat information provided in the Highlights or Description.'),\n", " Document(id='a4206354-0f14-42af-862b-65785c4be800', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 10}, page_content='• Candy & Snacks requires four key images to provide \\nmembers with product information clarity. \\n• This includes the sellable unit, nutrition/ ingredients, \\npack size, and product details.\\nIMAGE REQUIREMENTS'),\n", " Document(id='a00ee7ca-4ebc-4c1e-a33a-17c03e287d39', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 5}, page_content=\"• HIGHLIGHTS / KEY FEATURES \\nHighlights Outline \\nUse this outline for all PDPs. Follow the exact order. Template attached here.\\n1. Pack Size for Unit of Measure\\n•\\nHow it’s packaged; individually wrapped, resealable container, shareable \\n2. Product Description\\n•\\nFlavor profile and texture (creamy, crispy, crunchy, etc.) \\nVariety Packs\\n•\\nList all included flavors/brands\\n3. How to Enjoy / Who It’s For / Use Cases \\n•\\nSeasonal, Lunches, On the Go, Snacking, \\n4. Misc. Callouts\\n•\\nOrganic, Fair Trade, Kosher, Keto, (X)-Free, No artificial colors/ flavors, etc.\\n•\\nBusiness – vending machine, good for resale \\nHighlights include the product's \\nmost relevant information.\\nWhat Highlights must have:\\n•\\n3-5 bullet point statements \\n(maximum of 5)\\n•\"),\n", " Document(id='be579e7c-42ce-49e3-9c13-d551abb17492', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(id='d311e0cf-31b8-468f-bda0-031f4da0135c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|Col1|Candy|Chips, Popcorn & Crackers|Cookies & Snack Cakes|Nuts & BFY Snacks|\\n|---|---|---|---|---|\\n|**MANDATORY**|Flavor|Flavor|Flavor|Flavor|\\n|**MANDATORY**|Quantity / Weight|Quantity / Weight|Quantity / Weight|Quantity / Weight|\\n|**MANDATORY**|Nutrition|Nutrition|Nutrition|Nutrition|\\n|**MANDATORY**|Ingredients|Ingredients|Ingredients|Ingredients|\\n|**MANDATORY**|Brands|Brands|Brands|Brands|\\n|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|'),\n", " Document(id='2af10e76-2a41-486a-82ab-56f14bf978df', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 0}, page_content=\"• Purpose\\n• This content guide will help you create a best-in-class product page for your items. You'll learn how to build \\nhigh-quality content and deliver a great member experience. Refer to this guide during the item setup process \\nwhile completing the Item Setup Template.\\n• This guide addresses the following content fields:\\n•\\nProduct Titles: Defines the specific order and structure your product title should follow in accordance with \\ncopy description guidelines.\\n•\\nProduct Details: Specifies all required and recommended site attributes that contribute to content quality.\\n•\\nImages: Provides examples of expected images based on product type.\\n• Use this guide before content creation and during item setup to answer any content-related needs.\"),\n", " Document(id='6da57c68-741c-4b60-953c-a9b62ef1c532', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(id='760124eb-be66-4b0d-8e8a-886d931e3c18', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club'),\n", " Document(id='155759c1-83c0-49a7-afa3-f75c7a939e13', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 2}, page_content='|COMPONENTS IN BAG|PRODUCT TYPE|\\n|---|---|'),\n", " Document(id='520dc0e6-0b0c-4e0c-b2bd-033c32c2a39e', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 17}, page_content='5. Nice to Have – Dietary + Nutrition Callouts'),\n", " Document(id='7d4c070a-b87b-4fae-b38a-267683d4ef84', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.)'),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks')],\n", " 'answer': 'Cats are independent and low-maintenance pets. Cats are often seen as aloof and mysterious. Cats are great for small living spaces. Cats and Dogs are both popular pets. Cats are NOT AT ALL LOYAL.'}" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conversational_rag_chain.invoke(\n", " input={\"input\":\"Describe CATS?\"},\n", " config={\"configurable\":{\"session_id\":120}}\n", ")" ] }, { "cell_type": "code", "execution_count": 66, "id": "e9d17980", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'input': '1. Are they LOYAL? 2. What do I mean by THEY?',\n", " 'chat_history': [HumanMessage(content='Hello, I am Sanchit. What abt u?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Hello Sanchit, nice to meet you! I am a helpful AI assistant. I can answer your questions and provide information based on the documents I have been provided.', additional_kwargs={}, response_metadata={}),\n", " HumanMessage(content='What are popular pets?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Based on the provided documents, popular pets are Cats and Dogs.', additional_kwargs={}, response_metadata={}),\n", " HumanMessage(content='Describe CATS?', additional_kwargs={}, response_metadata={}),\n", " AIMessage(content='Cats are independent and low-maintenance pets. Cats are often seen as aloof and mysterious. Cats are great for small living spaces. Cats and Dogs are both popular pets. Cats are NOT AT ALL LOYAL.', additional_kwargs={}, response_metadata={})],\n", " 'context': [Document(id='52b436ad-2894-4976-bddf-71e18d426228', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),\n", " Document(id='0d0bb983-0436-4bc3-b1a9-90563eaf1760', metadata={}, page_content='Cats and Dogs are both popular pets.'),\n", " Document(id='64ab81dd-caa4-4fc6-a802-f8caeb372863', metadata={}, page_content='Dogs are loyal and require more attention.'),\n", " Document(id='bcf19987-4aa1-44f7-a456-0d600f0b252d', metadata={}, page_content='Cats are NOT AT ALL LOYAL.'),\n", " Document(id='63ea4109-e11b-48eb-aac5-cf234a9c3d0e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),\n", " Document(id='08214f30-26d1-4e5d-9a7a-73ab10d6b64e', metadata={}, page_content='Cats are often seen as aloof and mysterious.'),\n", " Document(id='6764ac04-e32c-4353-b273-c896c1767b0c', metadata={}, page_content='Cats are great for small living spaces.'),\n", " Document(id='be579e7c-42ce-49e3-9c13-d551abb17492', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 4}, page_content='(pcs.)\\n(pcs., pk.) \\n(pk.)\\n(oz.) \\n(lbs.)\\n(oz. / pk.)\\n(ct.)\\nU N I T O F M E A S U R E E X A M P L E S \\n\\n\\n\\n\\n\\n\\n\\n\\n|P L E S|(ct.) (lbs.) (pcs.)
(pcs., pk.)|\\n|---|---|\\n|M||\\n|F M E A S U R E E X A|**(pk.)**
**(oz.)**|\\n|O||\\n|U N I T|**(oz. / pk.)**|'),\n", " Document(id='3593c89f-83d6-4f17-9913-61208f44bad4', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|\\n|**SUGGESTE**
**D**|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|Company Efforts / Sustainability Initiatives / Seasonality|\\n|**SUGGESTE**
**D**|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|Food Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)|'),\n", " Document(id='03b3389a-8f98-4ec9-a19b-d05a4c3b8c62', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='P R O D U C T F E AT U R E M AT R I X\\nCandy\\nChips, Popcorn & Crackers\\nCookies & Snack Cakes\\nNuts & BFY Snacks\\nMANDATORY\\nFlavor\\nFlavor\\nFlavor\\nFlavor\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nQuantity / Weight\\nNutrition\\nNutrition\\nNutrition\\nNutrition\\nIngredients\\nIngredients\\nIngredients\\nIngredients\\nBrands\\nBrands\\nBrands\\nBrands\\nComponents\\nComponents\\nComponents\\nComponents\\nShelf Life\\nShelf Life\\nShelf Life\\nShelf Life\\nSUGGESTE\\nD\\nCompany Efforts / Sustainability Initiatives / Seasonality \\nFood Pairings / Recipes / Diet or Ingredient (Keto, Peanut Free, Organic, etc.)\\nSEO SUGGESTIONS\\nCandy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks'),\n", " Document(id='4ec957d7-8a21-4a05-97b7-629cc16b69b2', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club\\nDo not repeat information in the Highlights and/or \\nSpecifications.\\nThe Description calls out the product’s competitive advantage, item quantity, and quality details.\\nIt should include details that our members need to know to make a purchase decision.'),\n", " Document(id='c665048c-4150-42a2-924f-ca61128d90ac', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='|Approved for Candy & Snacks|Col2|\\n|---|---|\\n|piece|(pc.) / (pcs.)|\\n|pack(s)|(pk.) / (pks.)|\\n|count|(ct.)|\\n|ounces|(oz.)|\\n|pound(s)|(lb.) / (lbs.)|'),\n", " Document(id='a2786271-e658-4868-9668-10145acfcabf', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 18}, page_content='5. Nice to Have – Company Callouts / Product Usage / Lifestyle Graphics'),\n", " Document(id='6da57c68-741c-4b60-953c-a9b62ef1c532', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 1}, page_content=\"• PRODUCT TITLE / PRODUCT DESCRIPTION \\nBrand\\nEnter brand name as it appears on packaging. Match the brand's casing. \\nExample: M&M's. Include apostrophes. Use consistent branding and \\nnaming conventions.\\nProduct Title \\nThe Product Title details the brand, product type, and pack size.\\n50-60 characters is the optimal product title length, as longer titles will \\ntruncate (be shortened, with an ellipses) within the app experience.\\nBrand + Sub-Brand + Product Type + Size or Quantity\\n\\n\\n\\n\\n\\n\\n\\n\\n|Sub|-|Brand|\\n|---|---|---|\"),\n", " Document(id='c9435a94-f045-4318-ae25-98f592e45b4c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 14}, page_content='2. Nutrition / Ingredients – Variety \\nThis example image does not \\nclearly display the nutrition/ \\ningredient information for the \\nmember.'),\n", " Document(id='d311e0cf-31b8-468f-bda0-031f4da0135c', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='|Col1|Candy|Chips, Popcorn & Crackers|Cookies & Snack Cakes|Nuts & BFY Snacks|\\n|---|---|---|---|---|\\n|**MANDATORY**|Flavor|Flavor|Flavor|Flavor|\\n|**MANDATORY**|Quantity / Weight|Quantity / Weight|Quantity / Weight|Quantity / Weight|\\n|**MANDATORY**|Nutrition|Nutrition|Nutrition|Nutrition|\\n|**MANDATORY**|Ingredients|Ingredients|Ingredients|Ingredients|\\n|**MANDATORY**|Brands|Brands|Brands|Brands|\\n|**MANDATORY**|Components|Components|Components|Components|\\n|**MANDATORY**|Shelf Life|Shelf Life|Shelf Life|Shelf Life|'),\n", " Document(id='7d4c070a-b87b-4fae-b38a-267683d4ef84', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 3}, page_content='PRODUCT TITLE / PRODUCT DESCRIPTION \\nGuidelines\\nUnit of Measure is always - \\n•\\nIn lowercase\\n•\\nIn parentheses\\n•\\nSeparated by comma, no slash\\n•\\nExample : belVita Bites Breakfast Biscuits Variety Pack (1 oz., 36 \\npk.)\\nTotal weight of the sellable unit is not included in the Product Title \\nif the component count is listed. Example below.\\nApproved for Candy & Snacks\\npiece\\n(pc.) / (pcs.)\\npack(s) \\n(pk.) / (pks.)\\ncount \\n(ct.) \\nounces\\n(oz.)\\npound(s) \\n(lb.) / (lbs.)\\nUNIT OF MEASURE\\nIdentical component weights = (oz.,pk.) \\nVarying component weight = (pk.) \\nPASS\\nFAIL\\nVarying component weight = (pk.) \\nIdentical component weights = (oz.,pk.)'),\n", " Document(id='8454018a-c3c2-4203-8582-256435521696', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 7}, page_content='Candy\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nSnack(s) / Snack Packs\\nCandy Bars\\nChips Varity / Variety Chips\\nIndividually Wrapped Snacks\\nIndividually Wrapped Snacks\\nBulk Candy\\nIndividually Wrapped Snacks\\nChocolate / Chocolate Candy\\nCandy Variety'),\n", " Document(id='e5e3c384-622b-4a97-a396-82e0ad7d6987', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 15}, page_content='3. Pack Size for Unit of Measure\\nImages must clearly display the following –\\n• How members will see the item after opening the box / package.\\n• The number of packs / flavors included in a variety pack.'),\n", " Document(id='760124eb-be66-4b0d-8e8a-886d931e3c18', metadata={'producer': 'macOS Version 14.4.1 (Build 23E224) Quartz PDFContext', 'creator': '', 'creationdate': \"D:20240409154109Z00'00'\", 'source': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'file_path': '/Users/neetikasaxena/Documents/sanchit/sample_code/rag-with-gemma3/test_data/candy_snacks.pdf', 'total_pages': 26, 'format': 'PDF 1.4', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': \"D:20240409154109Z00'00'\", 'trapped': '', 'modDate': \"D:20240409154109Z00'00'\", 'creationDate': \"D:20240409154109Z00'00'\", 'page': 6}, page_content='• An item Description highlights the products experience, \\nunique benefits, and added value proposition to entice \\nshoppers.\\n• What we must have:\\n•\\nIn-depth product description\\n•\\nVariety packs require a description for each item included\\n•\\nUse the Product Features Matrix to view mandatory and \\nsuggested information.\\n• Suggestions\\n•\\nSeparate content into shorter paragraphs for easy \\nscanning. Introduce sub-headers where appropriate.\\n• DESCRIPTION / PRODUCT DETAILS / MARKETING \\nMESSAGE\\nWhat to avoid:\\n•\\nMisspellings, grammatical errors, incorrect HTML \\nformatting\\n•\\nDo not use exclamation points!\\n•\\nNo CAPS\\n•\\nCompany history that is irrelevant to the product\\n•\\nReferencing flavors, items, or brands not carried at \\nSam’s Club')],\n", " 'answer': 'Based on the documents, Cats are NOT AT ALL LOYAL.\\n\\n“They” refers to Cats.'}" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conversational_rag_chain.invoke(\n", " input={\"input\":\"1. Are they LOYAL? 2. What do I mean by THEY?\"},\n", " config={\"configurable\":{\"session_id\":120}}\n", ")" ] }, { "cell_type": "markdown", "id": "2c60b28f", "metadata": {}, "source": [ "# Filter data by users:" ] }, { "cell_type": "markdown", "id": "f6b884ab", "metadata": {}, "source": [ "## Add:" ] }, { "cell_type": "code", "execution_count": 102, "id": "ab174727", "metadata": {}, "outputs": [], "source": [ "sun_docs = [\n", " Document(\n", " page_content=\"The Sun is a nearly perfect sphere of hot plasma, at the center of the Solar System.\",\n", " metadata={\"session_id\": \"user_1\"}\n", " ), \n", " Document(\n", " page_content=\"It is composed primarily of hydrogen (about 74%) and helium (about 24%) by mass.\",\n", " metadata={\"session_id\": \"user_1\"}\n", " ),\n", " Document(\n", " page_content=\"The Sun's core temperature reaches approximately 15 million degrees Celsius.\",\n", " metadata={\"session_id\": \"user_1\"}\n", " ),\n", " Document(\n", " page_content=\"Solar flares and sunspots are caused by magnetic field activity on the Sun's surface.\",\n", " metadata={\"session_id\": \"user_1\"}\n", " ),\n", " Document(\n", " page_content=\"Light from the Sun takes about 8 minutes and 20 seconds to reach Earth.\",\n", " metadata={\"session_id\": \"user_1\"}\n", " ),\n", " \n", "]" ] }, { "cell_type": "code", "execution_count": 103, "id": "345b4632", "metadata": {}, "outputs": [], "source": [ "moon_docs = [\n", " Document(\n", " page_content=\"The Moon is Earth's only natural satellite and the fifth largest moon in the Solar System.\",\n", " metadata={\"session_id\": \"user_2\"}\n", " ),\n", " Document(\n", " page_content=\"It is about 1/6th the size of Earth and has a diameter of about 3,474 kilometers.\",\n", " metadata={\"session_id\": \"user_2\"}\n", " ),\n", " Document(\n", " page_content=\"The Moon's surface is covered with craters, mountains, and flat plains called maria.\",\n", " metadata={\"session_id\": \"user_2\"}\n", " ),\n", " Document(\n", " page_content=\"The Moon has no atmosphere, which means it cannot support life as we know it.\",\n", " metadata={\"session_id\": \"user_2\"}\n", " ),\n", " Document(\n", " page_content=\"The Moon's gravitational pull affects Earth's tides.\",\n", " metadata={\"session_id\": \"user_2\"}\n", " )\n", "]" ] }, { "cell_type": "code", "execution_count": 69, "id": "a7c746b0", "metadata": {}, "outputs": [], "source": [ "earth_docs = [\n", " Document(\n", " page_content=\"Earth is the third planet from the Sun and the only known planet to support life.\",\n", " metadata={\"session_id\": \"user_3\"}\n", " ),\n", " Document(\n", " page_content=\"It has a diameter of about 12,742 kilometers and is composed of rock and metal.\",\n", " metadata={\"session_id\": \"user_3\"}\n", " ),\n", " Document(\n", " page_content=\"Earth's atmosphere is composed primarily of nitrogen (about 78%) and oxygen (about 21%).\",\n", " metadata={\"session_id\": \"user_3\"}\n", " ),\n", " Document(\n", " page_content=\"The Earth has one natural satellite, the Moon.\",\n", " metadata={\"session_id\": \"user_3\"}\n", " ),\n", " Document(\n", " page_content=\"Earth's surface is covered by about 71% water.\",\n", " metadata={\"session_id\": \"user_3\"}\n", " )\n", "]" ] }, { "cell_type": "code", "execution_count": 70, "id": "92596d55", "metadata": {}, "outputs": [], "source": [ "common_docs = [\n", " Document(\n", " page_content=\"Moon revolves around Earth, while Earth revolves around the Sun.\",\n", " metadata={\"session_id\": \"public\"}\n", " ),\n", " Document(\n", " page_content=\"Earth, Sun and Moon all three are part of the Solar System.\",\n", " metadata={\"session_id\": \"public\"}\n", " ),\n", "]" ] }, { "cell_type": "markdown", "id": "c389c8d6", "metadata": {}, "source": [ "## Embed:" ] }, { "cell_type": "code", "execution_count": 107, "id": "af9b7481", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['15b5166e-7dda-404c-8756-817652b7380d',\n", " '71e80cbf-2386-4337-98d6-1f9bc1246074',\n", " '71667b85-67c7-4157-abad-62300dc83e2c',\n", " 'd7f937d7-b4bd-4e31-b065-9fb0d55e53e3',\n", " '047c0ece-de86-427b-841d-d8ef1505efc8']" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_1_docs = database.add_documents(sun_docs, embedding=embeddings)\n", "user_2_docs = database.add_documents(moon_docs, embedding=embeddings)\n", "user_3_docs = database.add_documents(earth_docs, embedding=embeddings)\n", "public_docs = database.add_documents(common_docs, embedding=embeddings)\n", "user_3_docs" ] }, { "cell_type": "markdown", "id": "7a3c88e1", "metadata": {}, "source": [ "## Retrieve with filters:\n", "- https://python.langchain.com/docs/integrations/vectorstores/faiss/#query-directly" ] }, { "cell_type": "code", "execution_count": 94, "id": "0b74e982", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=, search_type='similarity_score_threshold', search_kwargs={'k': 4, 'score_threshold': 0.05})" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever = database.as_retriever(\n", " search_type=\"similarity_score_threshold\",\n", " search_kwargs={'k': 4, 'score_threshold': 0.05}\n", ")\n", "retriever" ] }, { "cell_type": "code", "execution_count": 100, "id": "00b4dcde", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='3ee59c85-64dd-43fa-ac70-e065f9b1bf2d', metadata={'session_id': 'user_1'}, page_content=\"The Moon is Earth's only natural satellite and the fifth largest moon in the Solar System.\"),\n", " Document(id='cd51a6fb-ab0f-468a-a343-20a81b47af16', metadata={'session_id': 'user_1'}, page_content='Light from the Sun takes about 8 minutes and 20 seconds to reach Earth.'),\n", " Document(id='2542487c-a6b3-402f-9fb7-f1a4a59dad70', metadata={'session_id': 'user_1'}, page_content='The Sun is a nearly perfect sphere of hot plasma, at the center of the Solar System.'),\n", " Document(id='b75633fd-31ce-4b14-9e1a-7c05f2feffa2', metadata={'session_id': 'user_1'}, page_content='It is composed primarily of hydrogen (about 74%) and helium (about 24%) by mass.')]" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever.invoke(\"Does moon revolve around earth?\", filter={\"session_id\": \"user_1\"})" ] }, { "cell_type": "code", "execution_count": 110, "id": "f0877090", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='44480fa4-596e-4978-9dc2-cd889aa0c3e9', metadata={'session_id': 'user_2'}, page_content=\"The Moon is Earth's only natural satellite and the fifth largest moon in the Solar System.\"),\n", " Document(id='8aa8194f-c301-4b92-bebc-a114815f9a93', metadata={'session_id': 'user_2'}, page_content=\"The Moon's surface is covered with craters, mountains, and flat plains called maria.\"),\n", " Document(id='cad1d41d-2eb7-49a0-afc9-d1d9e4f4d85f', metadata={'session_id': 'user_2'}, page_content='It is about 1/6th the size of Earth and has a diameter of about 3,474 kilometers.'),\n", " Document(id='774fe30c-762f-41a8-84f6-259ba0817454', metadata={'session_id': 'user_2'}, page_content='The Moon has no atmosphere, which means it cannot support life as we know it.')]" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever.invoke(\"which is earth's only natural sitelite ?\", filter={\"session_id\": \"user_2\"})" ] }, { "cell_type": "code", "execution_count": 83, "id": "2bf99a19", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='53f8f17f-0354-4608-9741-c6c06c33b317', metadata={'session_id': 'user_3'}, page_content='Earth is the third planet from the Sun and the only known planet to support life.')]" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever.invoke(\"What is the Earth?\", filter={\"session_id\": \"user_3\"})" ] }, { "cell_type": "markdown", "id": "38c9090e", "metadata": {}, "source": [ "## Delete:" ] }, { "cell_type": "code", "execution_count": 106, "id": "e09a0437", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "database.delete(user_2_docs)" ] }, { "cell_type": "code", "execution_count": 85, "id": "95f4ba05", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No relevant docs were retrieved using the relevance score threshold 0.5\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever.invoke(\"What is the Earth?\", filter={\"session_id\": \"user_3\"})" ] }, { "cell_type": "markdown", "id": "33061cab", "metadata": {}, "source": [ "## Multiple Conditional Filters:\n", "- https://github.com/langchain-ai/langchain/discussions/20202" ] }, { "cell_type": "code", "execution_count": 109, "id": "0726bb4a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(id='44480fa4-596e-4978-9dc2-cd889aa0c3e9', metadata={'session_id': 'user_2'}, page_content=\"The Moon is Earth's only natural satellite and the fifth largest moon in the Solar System.\"),\n", " Document(id='e4b88b13-4f27-4eba-8f46-3223fd816885', metadata={'session_id': 'user_1'}, page_content='Light from the Sun takes about 8 minutes and 20 seconds to reach Earth.'),\n", " Document(id='8aa8194f-c301-4b92-bebc-a114815f9a93', metadata={'session_id': 'user_2'}, page_content=\"The Moon's surface is covered with craters, mountains, and flat plains called maria.\"),\n", " Document(id='cad1d41d-2eb7-49a0-afc9-d1d9e4f4d85f', metadata={'session_id': 'user_2'}, page_content='It is about 1/6th the size of Earth and has a diameter of about 3,474 kilometers.')]" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "retriever.invoke(\n", " input=\"which is earth's only natural sitelite ?\",\n", " filter={\n", " \"$or\": [\n", " {\"session_id\": \"user_2\"},\n", " {\"session_id\": \"user_1\"},\n", " ]\n", " })" ] }, { "cell_type": "markdown", "id": "3825791c", "metadata": {}, "source": [ "> So basically:\n", "- When user uploads something, assign it a id in metadata\n", "- Save the ids of embedded docs user-wise in some database\n", "- Use the filter to filter the docs by user-id + public docs\n", "- Once done, use the list of ids from database to delete the user data\n", "- Maybe using in memory SQLite would be best option" ] }, { "cell_type": "markdown", "id": "5a1edd66", "metadata": {}, "source": [ "# Testing:\n", "`Note:`\n", "- In this file so far I used 'session_id' for metadata\n", "- But, in project, it is 'user_id'" ] }, { "cell_type": "markdown", "id": "6ca93353", "metadata": {}, "source": [ "## Loading FAISS (disk) and checking all docs:" ] }, { "cell_type": "code", "execution_count": 113, "id": "1b8fd4d8", "metadata": {}, "outputs": [], "source": [ "from langchain_community.vectorstores import FAISS\n", "from langchain_ollama import OllamaEmbeddings\n", "embeddings = OllamaEmbeddings(model=\"mxbai-embed-large:latest\")\n", "\n", "db = FAISS.load_local(\n", " folder_path=\"./user_faiss\",\n", " index_name=\"index\",\n", " embeddings=embeddings,\n", " allow_dangerous_deserialization=True\n", ")" ] }, { "cell_type": "code", "execution_count": 114, "id": "b1681415", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db.similarity_search(\n", " query=\"a\",\n", " filter={\n", " \"$or\": [\n", " {\"user_id\": \"nervous_nerd\"},\n", " {\"user_id\": \"public\"}\n", " ]\n", " },\n", " kwargs={\"k\": 5}\n", ")" ] }, { "cell_type": "code", "execution_count": 115, "id": "b24c0554", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db.similarity_search(\n", " query=\"ballot\",\n", " filter={\"user_id\": \"nervous_nerd\"}\n", " # kwargs={\"k\": 5, \"score_threshold\": 0}\n", ")" ] }, { "cell_type": "markdown", "id": "0abe905c", "metadata": {}, "source": [ "## Building As retriever with filters:\n", "+ `Issue is that we are unable to set some filter while invoking RAG chain.`\n", "\n", "- Tried, but failed:\n", " - https://github.com/langchain-ai/langchain/issues/9195#issuecomment-1810893811\n", "- Working Solution:\n", " - https://github.com/langchain-ai/langchain/issues/9195#issuecomment-2095196865" ] }, { "cell_type": "code", "execution_count": 117, "id": "d2cc956b", "metadata": {}, "outputs": [], "source": [ "from langchain_community.vectorstores import FAISS\n", "from langchain_ollama import OllamaEmbeddings\n", "embeddings = OllamaEmbeddings(model=\"mxbai-embed-large:latest\")\n", "\n", "db = FAISS.load_local(\n", " folder_path=\"./user_faiss\",\n", " index_name=\"index\",\n", " embeddings=embeddings,\n", " allow_dangerous_deserialization=True\n", ")\n", "\n", "retriever = db.as_retriever()" ] }, { "cell_type": "code", "execution_count": 118, "id": "0257abb5", "metadata": {}, "outputs": [], "source": [ "from langchain_core.runnables import ConfigurableField\n", "configurable_retriever = retriever.configurable_fields(\n", " search_kwargs=ConfigurableField(\n", " id=\"search_kwargs\",\n", " name=\"Search Kwargs\",\n", " description=\"The search kwargs to use\",\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 119, "id": "a5979f8f", "metadata": {}, "outputs": [], "source": [ "config = {\n", " \"configurable\": {\n", " \"search_kwargs\": {\n", " \"k\": 5,\n", " \"search_type\": \"similarity\",\n", " # And here comes the hero:\n", " \"filter\": {\n", " \"$or\": [\n", " {\"user_id\": \"curious_cat\"},\n", " {\"user_id\": \"public\"}\n", " ]\n", " },\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 120, "id": "bf550003", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "configurable_retriever.invoke(\n", " input=\"What is the Sun?\",\n", " config=config\n", ")" ] }, { "cell_type": "markdown", "id": "c0687f3b", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "5c40c001", "metadata": {}, "source": [ "
\n", "

Important:

\n", " \n", "
" ] }, { "cell_type": "markdown", "id": "0df51717", "metadata": {}, "source": [ "- Just figured this out\n", "- if u are having LLM call in the chain, but still it is not working, the possible reason is `ChatPromptTemplate`.\n", "- If history is empty, then Template is skipped\n", "- And maybe hence, all further calls as well!!\n", "- So, if LLM is not getting called, try passing some history manually\n", "\n", "## My issue\n", "- I was using \"messages\" for the chat history\n", "- Cause, trimmer expects \"messages\" key for input\n", "- But, somehow, prompt template was not able to use \"messages\" key even though it was set explicitly like that.\n", "- Once replaced with \"chat_history\", it worked.\n", "- Also, for output always use \"answer\" key (in create hist aware retriever in Conversational RAG)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }