Spaces:

MRK4863
/

RAG-based-RecSys-using-LLMs

Sleeping

App Files Files Community

MRK4863 commited on Aug 20, 2024

Commit

337af81

1 Parent(s): ded1ed9

first commit

Browse files

Files changed (8) hide show

.gitattributes +1 -0
app.py +286 -0
config.yaml +4 -0
create_vectorStore.py +99 -0
data/.~lock.bigBasketProducts.csv# +1 -0
data/bigBasketProducts.csv +3 -0
requirements.txt +8 -0
trial_2.ipynb +663 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,286 @@

+import sys
+sys.path.append("..")
+import os
+import json
+import time
+import pandas as pd
+from dotenv import load_dotenv
+import streamlit as st
+from streamlit_extras.mention import mention
+import chromadb
+from openai import OpenAI
+import json
+# from langchain.vectorstores import Qdrant
+from langchain_community.embeddings.openai import OpenAIEmbeddings
+# from langchain.llms import OpenAI
+from langchain_community.vectorstores import Chroma
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.memory import ConversationSummaryMemory
+from langchain_community.chat_models import ChatOpenAI
+from langchain.prompts import PromptTemplate
+import yaml
+# Read YAML file
+with open("config.yaml", 'r') as stream:
+    CONFIG = yaml.safe_load(stream)
+# Number of records to retrieve
+K=4
+#############################################################################################################
+#############################################################################################################
+# Promp Template to be used for generating questions
+# @st.cache_resource(show_spinner=False)
+def PROMPT():
+    prompt_template = '''
+        You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query.
+        User is a human who, as a customer, wants to buy a product from this application.
+        Given below is the summary of conversation between you (AI) and the user (Human):
+        Context: {chat_history}
+        Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:
+        Question: {question}
+        Note:
+        - Give your answer in a compreshenive manner in enumerated format.
+        - Do not generate any information on your own, striclty stick to the provided data.
+        - Also, do not repeat the information that is already present in the context.
+        - If, you feel there is redundant information (or) an product is being described twice, specify that as well in the response.
+        - The tone of the answer should be like a polite and friendly AI Assistant.
+    '''
+    return PromptTemplate(
+        template=prompt_template, input_variables=["chat_history", "question"]
+    )
+# Load the LLM model for inference
+@st.cache_resource(show_spinner=False)
+def load_model():
+    try:
+        model = ChatOpenAI(
+            model='meta-llama/Meta-Llama-3.1-70B-Instruct',
+            api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
+            base_url="https://api.deepinfra.com/v1/openai",
+            max_tokens = 10000,
+            # temperature = 0.7,
+            # top_p = 0.9
+        )
+    except Exception as e:
+        st.error(e)
+        model = None
+    return model
+llm = load_model()
+# Memory to store the conversation history
+def memory():
+    if 'memory' not in st.session_state:
+        st.session_state.memory = ConversationSummaryMemory(
+            llm=llm,
+            memory_key="chat_history",
+            return_messages=True,
+            input_key="question",
+            output_key='answer'
+        )
+    return st.session_state.memory
+# Wrapper for DeepInfraEmbeddings generation
+class DeepInfraEmbeddings:
+    def __init__(self, api_key, base_url, model="BAAI/bge-large-en-v1.5"):
+        """Intialise client to access embedding model
+        Args:
+            api_key (str): Deep-Infra API key
+            base_url (str): URL to access the embeddings
+            model (str, optional): 1024 dimension embeddings. Defaults to "BAAI/bge-large-en-v1.5".
+        """
+        self.client = OpenAI(api_key=api_key, base_url=base_url)
+        self.model = model
+    def embed_documents(self, texts):
+        """Converts given INPUT data to corresponding embeddings
+        Args:
+            texts (str): INPUT database contents as string.
+        Returns:
+            list: List of embeddings
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        embeddings = self.client.embeddings.create(
+            model=self.model,
+            input=texts,
+            encoding_format="float"
+        )
+        return [embedding.embedding for embedding in embeddings.data]
+    def embed_query(self, text):
+        return self.embed_documents([text])[0]
+# Retriever to retrieve the products from the database
+# @st.cache_resource(show_spinner=False)
+def retriever(K):
+    client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))
+    embeddings = DeepInfraEmbeddings(
+                        api_key=CONFIG["API_KEY"],
+                        base_url=CONFIG["BASE_URL"]
+                    )
+    vector_store = Chroma(
+                        collection_name=CONFIG["COLLECTION_NAME"],
+                        embedding_function=embeddings,  # Pass the DeepInfraEmbeddings instance
+                        client=client,
+                        persist_directory = os.path.join(os.getcwd(), 'vector_stores')
+                    )
+    retriever = vector_store.as_retriever(search_kwargs={'k':K})
+    return retriever
+# Chain to chain the retriever with memory
+def Chain():
+    global K
+    chain = RetrievalQAWithSourcesChain.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever(K),
+        memory=memory(),
+        return_source_documents=True,
+    )
+    return chain
+# Search function to search for the products
+# @st.cache_data(show_spinner=False)
+def search(_chain, user_question):
+    gen_prompt = PROMPT().format(question=user_question, chat_history=memory().load_memory_variables({})['chat_history'][0].content)
+    try:
+        res = _chain(gen_prompt)
+    except Exception as e:
+        st.error(e)
+        res = None
+    return res
+#############################################################################################################
+#############################################################################################################
+# Initialize the app
+def init():
+    global K
+    st.set_page_config(
+        page_title="BigBasket Products",
+        page_icon="🧺",
+        layout="centered",
+        initial_sidebar_state="expanded",
+    )
+    with st.sidebar:
+        st.subheader('Parameters')
+        K = st.slider('K', 1, 10, K, help='Sets max number of products  \nthat can be retrieved')
+    st.header('BigBasket Products',divider=True)
+# Display the retrieved products
+def display_data(res):
+    try:
+        srcs = [json.loads(row.page_content) for row in res['source_documents']]
+        df = pd.DataFrame(srcs)
+    except Exception as e:
+        st.error(e)
+        return
+    df1 = df[['product','brand', 'sale_price', 'rating', 'description']]
+    # Remove duplicates
+    df1 = df1.drop_duplicates()
+    st.dataframe(
+        df1,
+        column_config={
+            "product": st.column_config.Column(
+                "Product Name",
+                width="medium"
+            ),
+            "brand": st.column_config.Column(
+                "Brand",
+                width="medium"
+            ),
+            "sale_price": st.column_config.NumberColumn(
+                "Sale Price",
+                help="The price of the product in USD",
+                min_value=0,
+                max_value=1000,
+                format="₹%f",
+            ),
+            "rating": st.column_config.NumberColumn(
+                "Rating",
+                help="Rating of the product",
+                format="%f ⭐",
+            ),
+            "description": "Description",
+        },
+        hide_index=True,
+    )
+def main():
+    init()
+    # Initialize chat history
+    if "messages" not in st.session_state.keys():
+        st.session_state.messages = [
+            {"role": "assistant", "content": "Hello 👋\n\n I am here to help you choose the product that you wanna buy!"}
+        ]
+    chain = Chain()
+    if prompt:=st.chat_input("Say something"): # Prompt for user input and save to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+    for message in st.session_state.messages: # Display the prior chat messages
+        with st.chat_message(message["role"]):
+            st.write(message["content"], unsafe_allow_html=False)
+    # If last message is not from assistant, generate a new response
+    if st.session_state.messages[-1]["role"] != "assistant":
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking..."):
+                start_time = time.time()
+                res = search(chain, prompt)
+                end_time = time.time()
+                st.toast(f'Search completed in :green[{end_time - start_time:.2f}] seconds', icon='✅')
+                if res is None:
+                    st.error("Something went wrong. Please try again.")
+                    return
+                answer = res['answer']
+                print('answer', answer)
+                message = {"role": "assistant", "content": answer}
+                st.session_state.messages.append(message) # Add response to message history
+                # Display assistant response in chat message container
+                message_placeholder = st.empty()
+                full_response = ""
+                # Simulate stream of response with milliseconds delay
+                for chunk in answer.split():
+                    full_response += chunk + " "
+                    time.sleep(0.05)
+                    # Add a blinking cursor to simulate typing
+                    message_placeholder.markdown(full_response + "▌", unsafe_allow_html=False)
+                message_placeholder.markdown(full_response, unsafe_allow_html=False)
+                # Dsiplay product details
+                display_data(res)
+if __name__ == "__main__":
+    main()

config.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+COLLECTION_NAME: "big-basket-products-all"
+API_KEY: "7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk"
+DATA_PATH: './data/bigBasketProducts.csv'
+BASE_URL: "https://api.deepinfra.com/v1/openai"

create_vectorStore.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import sys
+sys.path.append("..")
+import os.path
+import pandas as pd
+import time
+from tqdm import tqdm
+import chromadb
+from langchain.vectorstores import Chroma
+from langchain.embeddings.openai import OpenAIEmbeddings
+from openai import OpenAI
+from openai import OpenAI
+import yaml
+# Read YAML file
+with open("config.yaml", 'r') as stream:
+    CONFIG = yaml.safe_load(stream)
+# Wrapper for DeepInfraEmbeddings generation
+class DeepInfraEmbeddings:
+    def __init__(self, api_key, base_url, model="BAAI/bge-large-en-v1.5"):
+        """Intialise client to access embedding model
+        Args:
+            api_key (str): Deep-Infra API key
+            base_url (str): URL to access the embeddings
+            model (str, optional): 1024 dimension embeddings. Defaults to "BAAI/bge-large-en-v1.5".
+        """
+        self.client = OpenAI(api_key=api_key, base_url=base_url)
+        self.model = model
+    def embed_documents(self, texts):
+        """Converts given INPUT data to corresponding embeddings
+        Args:
+            texts (str): INPUT database contents as string.
+        Returns:
+            list: List of embeddings
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        embeddings = self.client.embeddings.create(
+            model=self.model,
+            input=texts,
+            encoding_format="float"
+        )
+        return [embedding.embedding for embedding in embeddings.data]
+    def embed_query(self, text):
+        return self.embed_documents([text])[0]
+# CREATE A LOCAL CHROMA_DB WITH  PERSISTENT STORAGE
+client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))
+# LOAD THE DATA_PATH
+file_path = os.path.join(CONFIG["DATA_PATH"])
+df = pd.read_csv(file_path)
+metadatas = [{'source': int(df.loc[i][0]), 'row': i} for i in range(len(df))]
+docs = df.apply(lambda x: x.to_json(), axis=1).tolist()
+# Initialize DeepInfraEmbeddings with your API key and base URL
+embeddings = DeepInfraEmbeddings(
+    api_key=CONFIG["API_KEY"],
+    base_url=CONFIG["BASE_URL"]
+)
+# Create Chroma collection
+vector_store = Chroma(
+    collection_name=CONFIG["COLLECTION_NAME"],
+    embedding_function=embeddings,  # Pass the DeepInfraEmbeddings instance
+    client=client,
+    persist_directory = os.path.join(os.getcwd(), 'vector_stores')
+)
+# Store the processed embeddings into the vector_store in chunks
+retries_dict = {}
+CHUNK_SIZE = 32
+for i in tqdm(range(0, len(docs), CHUNK_SIZE)):
+    try:
+        vector_store.add_texts(
+            texts=docs[i:i+CHUNK_SIZE],
+            metadatas=metadatas[i:i+CHUNK_SIZE],
+            ids=[str(x) for x in range(i, i+CHUNK_SIZE)]
+        )
+    except Exception as e:
+        print(i, e)
+        i = i - CHUNK_SIZE
+        retries_dict[i] = retries_dict.get(i, 0) + 1
+        if retries_dict[i] > 5:
+            print(f"Failed to add documents at index {i} after 3 retries. Skipping...")
+            i += CHUNK_SIZE
+            continue
+        time.sleep(1)

data/.~lock.bigBasketProducts.csv# ADDED Viewed

	@@ -0,0 +1 @@


1	+ ,rupesh,rupeshDesktop,20.08.2024 21:45,file:///home/rupesh/snap/libreoffice/324/.config/libreoffice/4;

data/bigBasketProducts.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b41689c4755f195f60c7a3b00cd3abbc7b04e53834fe7adc0dab582a74189ad
+size 16739247

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.104.1
+langchain==0.0.337
+pandas==2.0.3
+python-dotenv==1.0.0
+streamlit==1.28.2
+streamlit_extras==0.3.5
+tqdm==4.65.0
+uvicorn==0.24.0.post1

trial_2.ipynb ADDED Viewed

	@@ -0,0 +1,663 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append(\"..\")\n",
+    "import os.path\n",
+    "import pandas as pd\n",
+    "import time\n",
+    "from tqdm import tqdm\n",
+    "import chromadb\n",
+    "from openai import OpenAI\n",
+    "import json\n",
+    "\n",
+    "from langchain.vectorstores import Chroma\n",
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.chains import RetrievalQAWithSourcesChain\n",
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.vectorstores import Qdrant\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.prompts import PromptTemplate\n",
+    "from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class DeepInfraEmbeddings:\n",
+    "    def __init__(self, api_key, base_url, model=\"BAAI/bge-base-en-v1.5\"):\n",
+    "        self.client = OpenAI(api_key=api_key, base_url=base_url)\n",
+    "        self.model = model\n",
+    "\n",
+    "    def embed_documents(self, texts):\n",
+    "        if isinstance(texts, str):\n",
+    "            texts = [texts]\n",
+    "\n",
+    "        embeddings = self.client.embeddings.create(\n",
+    "            model=self.model,\n",
+    "            input=texts,\n",
+    "            encoding_format=\"float\"\n",
+    "        )\n",
+    "\n",
+    "        return [embedding.embedding for embedding in embeddings.data]\n",
+    "\n",
+    "    def embed_query(self, text):\n",
+    "        return self.embed_documents([text])[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/rupesh/miniconda3/envs/test/lib/python3.12/site-packages/langchain_core/_api/deprecation.py:141: LangChainDeprecationWarning: The class `Chroma` was deprecated in LangChain 0.2.9 and will be removed in 0.4. An updated version of the class exists in the langchain-chroma package and should be used instead. To use it run `pip install -U langchain-chroma` and import as `from langchain_chroma import Chroma`.\n",
+      "  warn_deprecated(\n"
+     ]
+    }
+   ],
+   "source": [
+    "COLLECTION_NAME = \"big-basket-products-all\"\n",
+    "\n",
+    "# Create Chroma client\n",
+    "# client = chromadb.Client()\n",
+    "client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))\n",
+    "\n",
+    "# Load data\n",
+    "file_path = os.path.join('./data/bigBasketProducts.csv')\n",
+    "df = pd.read_csv(file_path)\n",
+    "# df = df[:1000]\n",
+    "metadatas = [{'source': int(df.loc[i][0]), 'row': i} for i in range(len(df))]\n",
+    "docs = df.apply(lambda x: x.to_json(), axis=1).tolist()\n",
+    "\n",
+    "# Initialize DeepInfraEmbeddings with your API key and base URL\n",
+    "embeddings = DeepInfraEmbeddings(\n",
+    "    api_key=\"7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk\",\n",
+    "    base_url=\"https://api.deepinfra.com/v1/openai\"\n",
+    ")\n",
+    "\n",
+    "# Create Chroma collection\n",
+    "vector_store = Chroma(\n",
+    "    collection_name=COLLECTION_NAME,\n",
+    "    embedding_function=embeddings,  # Pass the DeepInfraEmbeddings instance\n",
+    "    client=client,\n",
+    "    persist_directory = os.path.join(os.getcwd(), 'vector_stores')\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = vector_store.as_retriever(search_kwargs={\"k\": 5})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/rupesh/miniconda3/envs/test/lib/python3.12/site-packages/langchain_core/_api/deprecation.py:141: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n",
+      "  warn_deprecated(\n"
+     ]
+    }
+   ],
+   "source": [
+    "docs = retriever.get_relevant_documents(\"what is skin care?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "id None\n",
+      "metadata {'row': 20544, 'source': 20545}\n",
+      "page_content {\"index\":20545,\"product\":\"Vitamin E Face Wash\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"INATUR \",\"sale_price\":315.0,\"market_price\":450.0,\"type\":\"Face Care\",\"rating\":null,\"description\":\"Inatur Vitamin E Face Cleanser is a mild and creamy formulation that removes dirt, impurities, and make-up gently. Being rich in anti-oxidants, it is effective in preserving the moisture balance of the skin. It leaves the skin nourished and hydrated making it look, soft, clean & healthy.\"}\n",
+      "type Document\n",
+      "id None\n",
+      "metadata {'row': 8225, 'source': 8226}\n",
+      "page_content {\"index\":8226,\"product\":\"Face Wash - Oily Skin\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Men's Grooming\",\"brand\":\"USTRAA\",\"sale_price\":194.0,\"market_price\":199.0,\"type\":\"Face & Body\",\"rating\":3.0,\"description\":\"This face wash with basil and lime extracts gives a younger, fresher and oil-free appearance. This face wash checks acne and controls oil on the face with the help of salicylic acid. If you have an oily skin, then this is the skin care you need. This product is completely paraben and sulphate free.\"}\n",
+      "type Document\n",
+      "id None\n",
+      "metadata {'row': 16313, 'source': 16314}\n",
+      "page_content {\"index\":16314,\"product\":\"Face Wash - Oily Skin\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"USTRAA\",\"sale_price\":194.0,\"market_price\":199.0,\"type\":\"Face & Body\",\"rating\":3.0,\"description\":\"This face wash with basil and lime extracts gives a younger, fresher and oil-free appearance. This face wash checks acne and controls oil on the face with the help of salicylic acid. If you have an oily skin, then this is the skin care you need. This product is completely paraben and sulphate free.\"}\n",
+      "type Document\n",
+      "id None\n",
+      "metadata {'row': 7248, 'source': 7249}\n",
+      "page_content {\"index\":7249,\"product\":\"Active Range Radiance Face Elixir Serum\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"Organic Harvest\",\"sale_price\":1695.75,\"market_price\":1995.0,\"type\":\"Face Care\",\"rating\":null,\"description\":\"A light weight organic beauty fluid for face brightening and anti ageing. It is a special formulation of organic ingredients that help the skin retain its youthful appearance. The beauty serum is a unique blend of oils and organic ingredients formulated to help skin look young and healthy. Aimed to give a glowing skin and to provide deep nourishment, relieve dark circles and repairs pigmentation specially formulated to help fight the signs of skin ageing.\"}\n",
+      "type Document\n",
+      "id None\n",
+      "metadata {'row': 4102, 'source': 4103}\n",
+      "page_content {\"index\":4103,\"product\":\"Party Glow Facial Kit\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"Vlcc\",\"sale_price\":167.05,\"market_price\":257.0,\"type\":\"Face Care\",\"rating\":4.1,\"description\":\"A revolutionary, 6 Step Facial System that helps you get that Facial Glow at the convenience of your home. It is a Do It Yourself Facial Kit, which allows you to get your facial done, all by yourself. It comes in the form of a Kit which combines all the steps. It helps in sloughing off dead skin cells. It also hydrates the skin and maintains its oil balance. This facial kit helps achieve a blemish-free, radiant complexion. It targets the skin areas for dullness and dehydration, making the skin soft and beautiful.Tip: Following a regular skin care regime can help you achieve flawless skin. For more tips on skin care, visit bigbasket lifestyle blog, Click Here to visit bigbasket\\u2019s lifestyle blog\"}\n",
+      "type Document\n"
+     ]
+    }
+   ],
+   "source": [
+    "for doc in docs:\n",
+    "    for k, v in doc:\n",
+    "        print(k, v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "class NeuralSearcher:\n",
+    "\n",
+    "    def __init__(self, collection_name: str):\n",
+    "        self.client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))\n",
+    "        \n",
+    "        self.embeddings = DeepInfraEmbeddings(\n",
+    "                        api_key=\"7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk\",\n",
+    "                        base_url=\"https://api.deepinfra.com/v1/openai\"\n",
+    "                    )\n",
+    "        self.vector_store = Chroma(\n",
+    "                        collection_name=COLLECTION_NAME,\n",
+    "                        embedding_function=self.embeddings,  # Pass the DeepInfraEmbeddings instance\n",
+    "                        client=self.client,\n",
+    "                        persist_directory = os.path.join(os.getcwd(), 'vector_stores')\n",
+    "                    )\n",
+    "        \n",
+    "        self.llm = ChatOpenAI(\n",
+    "            model='meta-llama/Meta-Llama-3.1-70B-Instruct',\n",
+    "            api_key=\"7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk\",\n",
+    "            base_url=\"https://api.deepinfra.com/v1/openai\",\n",
+    "            max_tokens = 70000\n",
+    "        )\n",
+    "        \n",
+    "        self.memory = ConversationSummaryMemory(\n",
+    "            llm=self.llm,\n",
+    "            memory_key=\"chat_history\",\n",
+    "            return_messages=True,\n",
+    "            input_key=\"question\",\n",
+    "            output_key='answer'\n",
+    "        )\n",
+    "        \n",
+    "        prompt_template = '''\n",
+    "        About: You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query. \n",
+    "        User is a human who, as a customer, wants to buy a product from this application.\n",
+    "\n",
+    "        Given below is the summary of conversation between you (AI) and the user (Human):\n",
+    "        Context: {chat_history}\n",
+    "\n",
+    "        Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:\n",
+    "        Question: {question}\n",
+    "\n",
+    "        Note: \n",
+    "        - Give your answer in a compreshenive manner in enumerated format.\n",
+    "        - Do not generate any information on your own, striclty stick to the provided data. \n",
+    "        - Also, do not repeat the information that is already present in the context.\n",
+    "        - If, you feel there is redundant information (or) an product is being described twice, specify that as well in the response.\n",
+    "        - The tone of the answer should be like a polite and friendly AI Assistant.\n",
+    "        '''\n",
+    "        self.PROMPT = PromptTemplate(\n",
+    "            template=prompt_template, input_variables=[\"chat_history\", \"question\"]\n",
+    "        )\n",
+    "\n",
+    "    def search(self, question: str, num_results: int, filter_: dict = None) -> dict:\n",
+    "        chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
+    "            llm=self.llm,\n",
+    "            chain_type=\"stuff\",\n",
+    "            retriever=self.vector_store.as_retriever(search_kwargs={'k':num_results}),\n",
+    "            memory=self.memory,\n",
+    "            return_source_documents=True,\n",
+    "        )\n",
+    "\n",
+    "        gen_prompt = self.PROMPT.format(question=question, chat_history=self.memory.load_memory_variables({})['chat_history'][0].content)\n",
+    "        start_time = time.time()\n",
+    "        res = chain(gen_prompt)\n",
+    "        print(f\"Search took {time.time() - start_time} seconds\")\n",
+    "\n",
+    "        ret = {}\n",
+    "        ret['answer'] = res['answer']\n",
+    "\n",
+    "        srcs = [json.loads(row.page_content) for row in res['source_documents']]\n",
+    "\n",
+    "        df = pd.DataFrame(srcs)\n",
+    "        df = df.fillna('null')\n",
+    "        # df.set_index('product', inplace=True)\n",
+    "\n",
+    "        # df1 = df[['product','brand', 'sale_price', 'rating', 'description', 'category', 'sub_category']]\n",
+    "        df1 = df\n",
+    "\n",
+    "        # Remove duplicates\n",
+    "        df1 = df1.drop_duplicates()\n",
+    "\n",
+    "        ret['products'] = df1.to_dict(orient='records')\n",
+    "        return ret\n",
+    "    \n",
+    "    def check_memory_history(self):\n",
+    "        return self.memory.load_memory_variables({})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "neural_searcher = NeuralSearcher(collection_name=COLLECTION_NAME)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Search took 4.530710458755493 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "q = \"Suggest me some top 5 Beverages products?\"\n",
+    "num_results = 5\n",
+    "res = neural_searcher.search(question=q, num_results=num_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I don't know.\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(res['answer'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answer': \"I don't know.\\n\\n\",\n",
+       " 'products': [{'index': 14541,\n",
+       "   'product': 'Perfume - Ultra Sensual',\n",
+       "   'category': 'Beauty & Hygiene',\n",
+       "   'sub_category': \"Men's Grooming\",\n",
+       "   'brand': 'Wild Stone',\n",
+       "   'sale_price': 569.05,\n",
+       "   'market_price': 599.0,\n",
+       "   'type': \"Men's Deodorants\",\n",
+       "   'rating': 4.5,\n",
+       "   'description': 'A bright, fresh and energetic fragrance with a touch of Woody & Musky notes that give it an exciting twist  For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
+       "  {'index': 24485,\n",
+       "   'product': 'Perfume - Ultra Sensual',\n",
+       "   'category': 'Beauty & Hygiene',\n",
+       "   'sub_category': 'Fragrances & Deos',\n",
+       "   'brand': 'Wild Stone',\n",
+       "   'sale_price': 569.05,\n",
+       "   'market_price': 599.0,\n",
+       "   'type': \"Men's Deodorants\",\n",
+       "   'rating': 4.5,\n",
+       "   'description': 'A bright, fresh and energetic fragrance with a touch of Woody & Musky notes that give it an exciting twist  For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
+       "  {'index': 7382,\n",
+       "   'product': 'Be Tempted Eau De Parfum',\n",
+       "   'category': 'Beauty & Hygiene',\n",
+       "   'sub_category': 'Fragrances & Deos',\n",
+       "   'brand': 'Dkny',\n",
+       "   'sale_price': 3485.0,\n",
+       "   'market_price': 4100.0,\n",
+       "   'type': 'Eau De Parfum',\n",
+       "   'rating': 5.0,\n",
+       "   'description': 'Have you ever wondered if you should? \\xa0If you dare? \\xa0Give in to the temptation with DKNY Be Tempted, a provocative delectably addictive new fragrance that expresses an overt willing and wants for sensorial seduction.'},\n",
+       "  {'index': 19717,\n",
+       "   'product': 'Perfume - Forest Spice',\n",
+       "   'category': 'Beauty & Hygiene',\n",
+       "   'sub_category': \"Men's Grooming\",\n",
+       "   'brand': 'Wild Stone',\n",
+       "   'sale_price': 359.4,\n",
+       "   'market_price': 599.0,\n",
+       "   'type': \"Men's Deodorants\",\n",
+       "   'rating': 3.0,\n",
+       "   'description': 'A bright, fresh and energetic fragrance with a touch of Woody & Oriental notes that give it an exciting twist  For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
+       "  {'index': 2919,\n",
+       "   'product': 'Body Deodorant - Aqua Fresh',\n",
+       "   'category': 'Beauty & Hygiene',\n",
+       "   'sub_category': 'Fragrances & Deos',\n",
+       "   'brand': 'Wild Stone',\n",
+       "   'sale_price': 169.15,\n",
+       "   'market_price': 199.0,\n",
+       "   'type': \"Men's Deodorants\",\n",
+       "   'rating': 'null',\n",
+       "   'description': 'The ultimate range of irresistible masculine fragrances. Makes Its Happen  For Beauty tips, tricks & more visitÃ‚Â\\xa0https://bigbasket.blog/'}]}"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mem = neural_searcher.check_memory_history()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[SystemMessage(content='The human is seeking product recommendations from the Product Recommendation Agent, specifically asking for the top 5 Kitchen, Garden & Pets products, and later asks for the top 5 Beverages products. The AI is unsure and responds with \"I don\\'t know\" to both requests.')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(mem[\"chat_history\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'chat_history': [SystemMessage(content='Here is the new summary:\\n\\nThe human asks the AI for top 5 hair product suggestions. The AI provides a list of 5 hair products based on the retrieved descriptions, including a professional brush, hair roller, hair gel, and an Ayurvedic balm. The AI notes that two of the products, the Professional Brush and Professional Brush - Roller, seem to be similar, with the only difference being the addition of \"Roller\" in the latter. The human asks for more information about the first product, which is the Professional Brush. The AI provides additional details about the Professional Brush, including its category, sub-category, brand, price, type, rating, and description, and reiterates that it seems similar to the Professional Brush - Roller product. The human then asks if there are any other similar products, and the AI responds by mentioning the Professional Brush - Roller as a similar product, noting that it has almost identical specifications and description as the Professional Brush.')]}"
+      ]
+     },
+     "execution_count": 128,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mem"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Search took 26.596819162368774 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "q = \"Tell me more about the first product\"\n",
+    "num_results = 5\n",
+    "res = neural_searcher.search(question=q, num_results=num_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I'm happy to help you with your question about the first product!\n",
+      "\n",
+      "Based on our previous conversation, the first product I mentioned was the \"Professional Brush\". Here are some additional details about this product:\n",
+      "\n",
+      "1. **Product Name**: Professional Brush\n",
+      "2. **Category**: Beauty & Hygiene\n",
+      "3. **Sub-Category**: Hair Care\n",
+      "4. **Brand**: Salon\n",
+      "5. **Sale Price**: ₹500.0\n",
+      "6. **Market Price**: ₹500.0\n",
+      "7. **Type**: Tools & Accessories\n",
+      "8. **Rating**: 5.0\n",
+      "9. **Description**: The best brushes will render application effortless and optimise the performance of your makeup products to their full potential.\n",
+      "\n",
+      "Please note that this product seems to be similar to the \"Professional Brush - Roller\" product, with the only difference being the addition of \"Roller\" in the latter. If you'd like to know more about the differences between these two products, I'd be happy to help!\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(res['answer'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answer': 'I\\'m happy to help you with your question about the first product!\\n\\nBased on our previous conversation, the first product I mentioned was the \"Professional Brush\". Here are some additional details about this product:\\n\\n1. **Product Name**: Professional Brush\\n2. **Category**: Beauty & Hygiene\\n3. **Sub-Category**: Hair Care\\n4. **Brand**: Salon\\n5. **Sale Price**: ₹500.0\\n6. **Market Price**: ₹500.0\\n7. **Type**: Tools & Accessories\\n8. **Rating**: 5.0\\n9. **Description**: The best brushes will render application effortless and optimise the performance of your makeup products to their full potential.\\n\\nPlease note that this product seems to be similar to the \"Professional Brush - Roller\" product, with the only difference being the addition of \"Roller\" in the latter. If you\\'d like to know more about the differences between these two products, I\\'d be happy to help!\\n\\n',\n",
+       " 'products': [{'product': 'Professional Brush - Roller',\n",
+       "   'brand': 'Salon',\n",
+       "   'sale_price': 500.0,\n",
+       "   'rating': 4.0,\n",
+       "   'description': 'The bestÃƒ€š\\xa0brushesÃƒ€š\\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visit\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Professional Brush',\n",
+       "   'brand': 'Salon',\n",
+       "   'sale_price': 500.0,\n",
+       "   'rating': 5.0,\n",
+       "   'description': 'The bestÃƒâ€šÂ\\xa0brushesÃƒâ€šÂ\\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Hair Roller - Medium 20 mm',\n",
+       "   'brand': 'Daiou',\n",
+       "   'sale_price': 300.0,\n",
+       "   'rating': 4.0,\n",
+       "   'description': 'For Hair stylers  For Beauty tips, tricks & more visitÃ‚\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Balm - Ultra Power',\n",
+       "   'brand': 'Zandu',\n",
+       "   'sale_price': 42.0,\n",
+       "   'rating': 4.4,\n",
+       "   'description': 'Ayurvedic Proprietary Medicine For External Use Only  For Beauty tips, tricks & more visitÃƒâ€šÃ‚Â\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Hair Care Kit - Anti Hair Fall',\n",
+       "   'brand': 'Mamaearth',\n",
+       "   'sale_price': 999.0,\n",
+       "   'rating': 2.8,\n",
+       "   'description': 'Hi! I am Mamaearths Anti Hair Fall Control Kit. I am full of natural and organic bio-actives which help promote hair fall control & hair regrowth. I follow a 4 step process from root to tip to ensure I fix all your hair issues. Use me regularly and you wont ever need another hair products.'}]}"
+      ]
+     },
+     "execution_count": 122,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Search took 21.11023235321045 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "q = \"Are there any other similar products?\"\n",
+    "num_results = 5\n",
+    "res = neural_searcher.search(question=q, num_results=num_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Based on the provided data, here are some similar products to the Professional Brush:\n",
+      "\n",
+      "1. **Professional Brush - Roller**: This product seems to be very similar to the Professional Brush, with the only difference being the addition of \"Roller\" in the name. The description and specifications are almost identical. (\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(res['answer'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'answer': 'Based on the provided data, here are some similar products to the Professional Brush:\\n\\n1. **Professional Brush - Roller**: This product seems to be very similar to the Professional Brush, with the only difference being the addition of \"Roller\" in the name. The description and specifications are almost identical. (',\n",
+       " 'products': [{'product': 'Professional Brush - Roller',\n",
+       "   'brand': 'Salon',\n",
+       "   'sale_price': 500.0,\n",
+       "   'rating': 4.0,\n",
+       "   'description': 'The bestÃƒ€š\\xa0brushesÃƒ€š\\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visit\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Professional Brush',\n",
+       "   'brand': 'Salon',\n",
+       "   'sale_price': 500.0,\n",
+       "   'rating': 5.0,\n",
+       "   'description': 'The bestÃƒâ€šÂ\\xa0brushesÃƒâ€šÂ\\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Hair Roller - Medium 20 mm',\n",
+       "   'brand': 'Daiou',\n",
+       "   'sale_price': 300.0,\n",
+       "   'rating': 4.0,\n",
+       "   'description': 'For Hair stylers  For Beauty tips, tricks & more visitÃ‚\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Balm - Ultra Power',\n",
+       "   'brand': 'Zandu',\n",
+       "   'sale_price': 42.0,\n",
+       "   'rating': 4.4,\n",
+       "   'description': 'Ayurvedic Proprietary Medicine For External Use Only  For Beauty tips, tricks & more visitÃƒâ€šÃ‚Â\\xa0https://bigbasket.blog/'},\n",
+       "  {'product': 'Hair Spray',\n",
+       "   'brand': 'Novagold',\n",
+       "   'sale_price': 1200.0,\n",
+       "   'rating': 1.0,\n",
+       "   'description': 'Info not in English  For Beauty tips, tricks & more visitÃƒ€š\\xa0https://bigbasket.blog/'}]}"
+      ]
+     },
+     "execution_count": 125,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "# Read YAML file\n",
+    "with open(\"config.yaml\", 'r') as stream:\n",
+    "    data_loaded = yaml.safe_load(stream)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'COLLECTION_NAME': 'big-basket-products-all', 'API_KEY': ''}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_loaded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}