MRK4863 commited on
Commit
337af81
·
1 Parent(s): ded1ed9

first commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append("..")
3
+
4
+ import os
5
+ import json
6
+ import time
7
+ import pandas as pd
8
+ from dotenv import load_dotenv
9
+ import streamlit as st
10
+ from streamlit_extras.mention import mention
11
+ import chromadb
12
+ from openai import OpenAI
13
+ import json
14
+ # from langchain.vectorstores import Qdrant
15
+ from langchain_community.embeddings.openai import OpenAIEmbeddings
16
+ # from langchain.llms import OpenAI
17
+ from langchain_community.vectorstores import Chroma
18
+ from langchain.chains import RetrievalQAWithSourcesChain
19
+ from langchain.memory import ConversationSummaryMemory
20
+ from langchain_community.chat_models import ChatOpenAI
21
+ from langchain.prompts import PromptTemplate
22
+ import yaml
23
+ # Read YAML file
24
+ with open("config.yaml", 'r') as stream:
25
+ CONFIG = yaml.safe_load(stream)
26
+
27
+ # Number of records to retrieve
28
+ K=4
29
+
30
+ #############################################################################################################
31
+ #############################################################################################################
32
+
33
+ # Promp Template to be used for generating questions
34
+ # @st.cache_resource(show_spinner=False)
35
+ def PROMPT():
36
+ prompt_template = '''
37
+ You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query.
38
+ User is a human who, as a customer, wants to buy a product from this application.
39
+
40
+ Given below is the summary of conversation between you (AI) and the user (Human):
41
+ Context: {chat_history}
42
+
43
+ Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:
44
+ Question: {question}
45
+
46
+ Note:
47
+ - Give your answer in a compreshenive manner in enumerated format.
48
+ - Do not generate any information on your own, striclty stick to the provided data.
49
+ - Also, do not repeat the information that is already present in the context.
50
+ - If, you feel there is redundant information (or) an product is being described twice, specify that as well in the response.
51
+ - The tone of the answer should be like a polite and friendly AI Assistant.
52
+ '''
53
+
54
+ return PromptTemplate(
55
+ template=prompt_template, input_variables=["chat_history", "question"]
56
+ )
57
+
58
+ # Load the LLM model for inference
59
+ @st.cache_resource(show_spinner=False)
60
+ def load_model():
61
+ try:
62
+ model = ChatOpenAI(
63
+ model='meta-llama/Meta-Llama-3.1-70B-Instruct',
64
+ api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
65
+ base_url="https://api.deepinfra.com/v1/openai",
66
+ max_tokens = 10000,
67
+ # temperature = 0.7,
68
+ # top_p = 0.9
69
+ )
70
+ except Exception as e:
71
+ st.error(e)
72
+ model = None
73
+ return model
74
+
75
+ llm = load_model()
76
+
77
+ # Memory to store the conversation history
78
+ def memory():
79
+ if 'memory' not in st.session_state:
80
+ st.session_state.memory = ConversationSummaryMemory(
81
+ llm=llm,
82
+ memory_key="chat_history",
83
+ return_messages=True,
84
+ input_key="question",
85
+ output_key='answer'
86
+ )
87
+ return st.session_state.memory
88
+
89
+ # Wrapper for DeepInfraEmbeddings generation
90
+ class DeepInfraEmbeddings:
91
+ def __init__(self, api_key, base_url, model="BAAI/bge-large-en-v1.5"):
92
+ """Intialise client to access embedding model
93
+
94
+ Args:
95
+ api_key (str): Deep-Infra API key
96
+ base_url (str): URL to access the embeddings
97
+ model (str, optional): 1024 dimension embeddings. Defaults to "BAAI/bge-large-en-v1.5".
98
+ """
99
+ self.client = OpenAI(api_key=api_key, base_url=base_url)
100
+ self.model = model
101
+
102
+ def embed_documents(self, texts):
103
+ """Converts given INPUT data to corresponding embeddings
104
+
105
+ Args:
106
+ texts (str): INPUT database contents as string.
107
+
108
+ Returns:
109
+ list: List of embeddings
110
+ """
111
+ if isinstance(texts, str):
112
+ texts = [texts]
113
+
114
+ embeddings = self.client.embeddings.create(
115
+ model=self.model,
116
+ input=texts,
117
+ encoding_format="float"
118
+ )
119
+
120
+ return [embedding.embedding for embedding in embeddings.data]
121
+
122
+ def embed_query(self, text):
123
+ return self.embed_documents([text])[0]
124
+
125
+ # Retriever to retrieve the products from the database
126
+ # @st.cache_resource(show_spinner=False)
127
+ def retriever(K):
128
+ client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))
129
+
130
+ embeddings = DeepInfraEmbeddings(
131
+ api_key=CONFIG["API_KEY"],
132
+ base_url=CONFIG["BASE_URL"]
133
+ )
134
+
135
+ vector_store = Chroma(
136
+ collection_name=CONFIG["COLLECTION_NAME"],
137
+ embedding_function=embeddings, # Pass the DeepInfraEmbeddings instance
138
+ client=client,
139
+ persist_directory = os.path.join(os.getcwd(), 'vector_stores')
140
+ )
141
+
142
+ retriever = vector_store.as_retriever(search_kwargs={'k':K})
143
+
144
+ return retriever
145
+
146
+ # Chain to chain the retriever with memory
147
+ def Chain():
148
+ global K
149
+ chain = RetrievalQAWithSourcesChain.from_chain_type(
150
+ llm=llm,
151
+ chain_type="stuff",
152
+ retriever=retriever(K),
153
+ memory=memory(),
154
+ return_source_documents=True,
155
+ )
156
+
157
+ return chain
158
+
159
+ # Search function to search for the products
160
+ # @st.cache_data(show_spinner=False)
161
+ def search(_chain, user_question):
162
+ gen_prompt = PROMPT().format(question=user_question, chat_history=memory().load_memory_variables({})['chat_history'][0].content)
163
+ try:
164
+ res = _chain(gen_prompt)
165
+ except Exception as e:
166
+ st.error(e)
167
+ res = None
168
+ return res
169
+
170
+ #############################################################################################################
171
+ #############################################################################################################
172
+
173
+ # Initialize the app
174
+ def init():
175
+ global K
176
+
177
+ st.set_page_config(
178
+ page_title="BigBasket Products",
179
+ page_icon="🧺",
180
+ layout="centered",
181
+ initial_sidebar_state="expanded",
182
+ )
183
+
184
+ with st.sidebar:
185
+
186
+ st.subheader('Parameters')
187
+ K = st.slider('K', 1, 10, K, help='Sets max number of products \nthat can be retrieved')
188
+
189
+ st.header('BigBasket Products',divider=True)
190
+
191
+ # Display the retrieved products
192
+ def display_data(res):
193
+ try:
194
+ srcs = [json.loads(row.page_content) for row in res['source_documents']]
195
+
196
+ df = pd.DataFrame(srcs)
197
+ except Exception as e:
198
+ st.error(e)
199
+ return
200
+
201
+ df1 = df[['product','brand', 'sale_price', 'rating', 'description']]
202
+
203
+ # Remove duplicates
204
+ df1 = df1.drop_duplicates()
205
+
206
+ st.dataframe(
207
+ df1,
208
+ column_config={
209
+ "product": st.column_config.Column(
210
+ "Product Name",
211
+ width="medium"
212
+ ),
213
+ "brand": st.column_config.Column(
214
+ "Brand",
215
+ width="medium"
216
+ ),
217
+ "sale_price": st.column_config.NumberColumn(
218
+ "Sale Price",
219
+ help="The price of the product in USD",
220
+ min_value=0,
221
+ max_value=1000,
222
+ format="₹%f",
223
+ ),
224
+ "rating": st.column_config.NumberColumn(
225
+ "Rating",
226
+ help="Rating of the product",
227
+ format="%f ⭐",
228
+ ),
229
+ "description": "Description",
230
+ },
231
+ hide_index=True,
232
+ )
233
+
234
+ def main():
235
+
236
+ init()
237
+
238
+ # Initialize chat history
239
+ if "messages" not in st.session_state.keys():
240
+ st.session_state.messages = [
241
+ {"role": "assistant", "content": "Hello 👋\n\n I am here to help you choose the product that you wanna buy!"}
242
+ ]
243
+
244
+ chain = Chain()
245
+
246
+ if prompt:=st.chat_input("Say something"): # Prompt for user input and save to chat history
247
+ st.session_state.messages.append({"role": "user", "content": prompt})
248
+
249
+ for message in st.session_state.messages: # Display the prior chat messages
250
+ with st.chat_message(message["role"]):
251
+ st.write(message["content"], unsafe_allow_html=False)
252
+
253
+ # If last message is not from assistant, generate a new response
254
+ if st.session_state.messages[-1]["role"] != "assistant":
255
+ with st.chat_message("assistant"):
256
+ with st.spinner("Thinking..."):
257
+ start_time = time.time()
258
+ res = search(chain, prompt)
259
+ end_time = time.time()
260
+ st.toast(f'Search completed in :green[{end_time - start_time:.2f}] seconds', icon='✅')
261
+ if res is None:
262
+ st.error("Something went wrong. Please try again.")
263
+ return
264
+
265
+ answer = res['answer']
266
+ print('answer', answer)
267
+ message = {"role": "assistant", "content": answer}
268
+ st.session_state.messages.append(message) # Add response to message history
269
+
270
+ # Display assistant response in chat message container
271
+ message_placeholder = st.empty()
272
+ full_response = ""
273
+
274
+ # Simulate stream of response with milliseconds delay
275
+ for chunk in answer.split():
276
+ full_response += chunk + " "
277
+ time.sleep(0.05)
278
+ # Add a blinking cursor to simulate typing
279
+ message_placeholder.markdown(full_response + "▌", unsafe_allow_html=False)
280
+ message_placeholder.markdown(full_response, unsafe_allow_html=False)
281
+
282
+ # Dsiplay product details
283
+ display_data(res)
284
+
285
+ if __name__ == "__main__":
286
+ main()
config.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ COLLECTION_NAME: "big-basket-products-all"
2
+ API_KEY: "7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk"
3
+ DATA_PATH: './data/bigBasketProducts.csv'
4
+ BASE_URL: "https://api.deepinfra.com/v1/openai"
create_vectorStore.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append("..")
3
+ import os.path
4
+ import pandas as pd
5
+ import time
6
+ from tqdm import tqdm
7
+ import chromadb
8
+ from langchain.vectorstores import Chroma
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from openai import OpenAI
11
+
12
+ from openai import OpenAI
13
+
14
+ import yaml
15
+ # Read YAML file
16
+ with open("config.yaml", 'r') as stream:
17
+ CONFIG = yaml.safe_load(stream)
18
+
19
+ # Wrapper for DeepInfraEmbeddings generation
20
+ class DeepInfraEmbeddings:
21
+ def __init__(self, api_key, base_url, model="BAAI/bge-large-en-v1.5"):
22
+ """Intialise client to access embedding model
23
+
24
+ Args:
25
+ api_key (str): Deep-Infra API key
26
+ base_url (str): URL to access the embeddings
27
+ model (str, optional): 1024 dimension embeddings. Defaults to "BAAI/bge-large-en-v1.5".
28
+ """
29
+ self.client = OpenAI(api_key=api_key, base_url=base_url)
30
+ self.model = model
31
+
32
+ def embed_documents(self, texts):
33
+ """Converts given INPUT data to corresponding embeddings
34
+
35
+ Args:
36
+ texts (str): INPUT database contents as string.
37
+
38
+ Returns:
39
+ list: List of embeddings
40
+ """
41
+ if isinstance(texts, str):
42
+ texts = [texts]
43
+
44
+ embeddings = self.client.embeddings.create(
45
+ model=self.model,
46
+ input=texts,
47
+ encoding_format="float"
48
+ )
49
+
50
+ return [embedding.embedding for embedding in embeddings.data]
51
+
52
+ def embed_query(self, text):
53
+ return self.embed_documents([text])[0]
54
+
55
+
56
+
57
+ # CREATE A LOCAL CHROMA_DB WITH PERSISTENT STORAGE
58
+ client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))
59
+
60
+ # LOAD THE DATA_PATH
61
+ file_path = os.path.join(CONFIG["DATA_PATH"])
62
+ df = pd.read_csv(file_path)
63
+ metadatas = [{'source': int(df.loc[i][0]), 'row': i} for i in range(len(df))]
64
+ docs = df.apply(lambda x: x.to_json(), axis=1).tolist()
65
+
66
+ # Initialize DeepInfraEmbeddings with your API key and base URL
67
+ embeddings = DeepInfraEmbeddings(
68
+ api_key=CONFIG["API_KEY"],
69
+ base_url=CONFIG["BASE_URL"]
70
+ )
71
+
72
+ # Create Chroma collection
73
+ vector_store = Chroma(
74
+ collection_name=CONFIG["COLLECTION_NAME"],
75
+ embedding_function=embeddings, # Pass the DeepInfraEmbeddings instance
76
+ client=client,
77
+ persist_directory = os.path.join(os.getcwd(), 'vector_stores')
78
+ )
79
+
80
+ # Store the processed embeddings into the vector_store in chunks
81
+ retries_dict = {}
82
+ CHUNK_SIZE = 32
83
+ for i in tqdm(range(0, len(docs), CHUNK_SIZE)):
84
+ try:
85
+ vector_store.add_texts(
86
+ texts=docs[i:i+CHUNK_SIZE],
87
+ metadatas=metadatas[i:i+CHUNK_SIZE],
88
+ ids=[str(x) for x in range(i, i+CHUNK_SIZE)]
89
+ )
90
+ except Exception as e:
91
+ print(i, e)
92
+ i = i - CHUNK_SIZE
93
+ retries_dict[i] = retries_dict.get(i, 0) + 1
94
+ if retries_dict[i] > 5:
95
+ print(f"Failed to add documents at index {i} after 3 retries. Skipping...")
96
+ i += CHUNK_SIZE
97
+ continue
98
+ time.sleep(1)
99
+
data/.~lock.bigBasketProducts.csv# ADDED
@@ -0,0 +1 @@
 
 
1
+ ,rupesh,rupeshDesktop,20.08.2024 21:45,file:///home/rupesh/snap/libreoffice/324/.config/libreoffice/4;
data/bigBasketProducts.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b41689c4755f195f60c7a3b00cd3abbc7b04e53834fe7adc0dab582a74189ad
3
+ size 16739247
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ langchain==0.0.337
3
+ pandas==2.0.3
4
+ python-dotenv==1.0.0
5
+ streamlit==1.28.2
6
+ streamlit_extras==0.3.5
7
+ tqdm==4.65.0
8
+ uvicorn==0.24.0.post1
trial_2.ipynb ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import sys\n",
10
+ "sys.path.append(\"..\")\n",
11
+ "import os.path\n",
12
+ "import pandas as pd\n",
13
+ "import time\n",
14
+ "from tqdm import tqdm\n",
15
+ "import chromadb\n",
16
+ "from openai import OpenAI\n",
17
+ "import json\n",
18
+ "\n",
19
+ "from langchain.vectorstores import Chroma\n",
20
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
21
+ "from langchain.chains import RetrievalQAWithSourcesChain\n",
22
+ "from langchain.embeddings.openai import OpenAIEmbeddings\n",
23
+ "from langchain.vectorstores import Qdrant\n",
24
+ "from langchain.chat_models import ChatOpenAI\n",
25
+ "from langchain.prompts import PromptTemplate\n",
26
+ "from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 4,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "class DeepInfraEmbeddings:\n",
36
+ " def __init__(self, api_key, base_url, model=\"BAAI/bge-base-en-v1.5\"):\n",
37
+ " self.client = OpenAI(api_key=api_key, base_url=base_url)\n",
38
+ " self.model = model\n",
39
+ "\n",
40
+ " def embed_documents(self, texts):\n",
41
+ " if isinstance(texts, str):\n",
42
+ " texts = [texts]\n",
43
+ "\n",
44
+ " embeddings = self.client.embeddings.create(\n",
45
+ " model=self.model,\n",
46
+ " input=texts,\n",
47
+ " encoding_format=\"float\"\n",
48
+ " )\n",
49
+ "\n",
50
+ " return [embedding.embedding for embedding in embeddings.data]\n",
51
+ "\n",
52
+ " def embed_query(self, text):\n",
53
+ " return self.embed_documents([text])[0]"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 5,
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "name": "stderr",
63
+ "output_type": "stream",
64
+ "text": [
65
+ "/home/rupesh/miniconda3/envs/test/lib/python3.12/site-packages/langchain_core/_api/deprecation.py:141: LangChainDeprecationWarning: The class `Chroma` was deprecated in LangChain 0.2.9 and will be removed in 0.4. An updated version of the class exists in the langchain-chroma package and should be used instead. To use it run `pip install -U langchain-chroma` and import as `from langchain_chroma import Chroma`.\n",
66
+ " warn_deprecated(\n"
67
+ ]
68
+ }
69
+ ],
70
+ "source": [
71
+ "COLLECTION_NAME = \"big-basket-products-all\"\n",
72
+ "\n",
73
+ "# Create Chroma client\n",
74
+ "# client = chromadb.Client()\n",
75
+ "client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))\n",
76
+ "\n",
77
+ "# Load data\n",
78
+ "file_path = os.path.join('./data/bigBasketProducts.csv')\n",
79
+ "df = pd.read_csv(file_path)\n",
80
+ "# df = df[:1000]\n",
81
+ "metadatas = [{'source': int(df.loc[i][0]), 'row': i} for i in range(len(df))]\n",
82
+ "docs = df.apply(lambda x: x.to_json(), axis=1).tolist()\n",
83
+ "\n",
84
+ "# Initialize DeepInfraEmbeddings with your API key and base URL\n",
85
+ "embeddings = DeepInfraEmbeddings(\n",
86
+ " api_key=\"7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk\",\n",
87
+ " base_url=\"https://api.deepinfra.com/v1/openai\"\n",
88
+ ")\n",
89
+ "\n",
90
+ "# Create Chroma collection\n",
91
+ "vector_store = Chroma(\n",
92
+ " collection_name=COLLECTION_NAME,\n",
93
+ " embedding_function=embeddings, # Pass the DeepInfraEmbeddings instance\n",
94
+ " client=client,\n",
95
+ " persist_directory = os.path.join(os.getcwd(), 'vector_stores')\n",
96
+ ")"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 6,
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "retriever = vector_store.as_retriever(search_kwargs={\"k\": 5})"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 7,
111
+ "metadata": {},
112
+ "outputs": [
113
+ {
114
+ "name": "stderr",
115
+ "output_type": "stream",
116
+ "text": [
117
+ "/home/rupesh/miniconda3/envs/test/lib/python3.12/site-packages/langchain_core/_api/deprecation.py:141: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n",
118
+ " warn_deprecated(\n"
119
+ ]
120
+ }
121
+ ],
122
+ "source": [
123
+ "docs = retriever.get_relevant_documents(\"what is skin care?\")"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": 8,
129
+ "metadata": {},
130
+ "outputs": [
131
+ {
132
+ "name": "stdout",
133
+ "output_type": "stream",
134
+ "text": [
135
+ "id None\n",
136
+ "metadata {'row': 20544, 'source': 20545}\n",
137
+ "page_content {\"index\":20545,\"product\":\"Vitamin E Face Wash\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"INATUR \",\"sale_price\":315.0,\"market_price\":450.0,\"type\":\"Face Care\",\"rating\":null,\"description\":\"Inatur Vitamin E Face Cleanser is a mild and creamy formulation that removes dirt, impurities, and make-up gently. Being rich in anti-oxidants, it is effective in preserving the moisture balance of the skin. It leaves the skin nourished and hydrated making it look, soft, clean & healthy.\"}\n",
138
+ "type Document\n",
139
+ "id None\n",
140
+ "metadata {'row': 8225, 'source': 8226}\n",
141
+ "page_content {\"index\":8226,\"product\":\"Face Wash - Oily Skin\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Men's Grooming\",\"brand\":\"USTRAA\",\"sale_price\":194.0,\"market_price\":199.0,\"type\":\"Face & Body\",\"rating\":3.0,\"description\":\"This face wash with basil and lime extracts gives a younger, fresher and oil-free appearance. This face wash checks acne and controls oil on the face with the help of salicylic acid. If you have an oily skin, then this is the skin care you need. This product is completely paraben and sulphate free.\"}\n",
142
+ "type Document\n",
143
+ "id None\n",
144
+ "metadata {'row': 16313, 'source': 16314}\n",
145
+ "page_content {\"index\":16314,\"product\":\"Face Wash - Oily Skin\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"USTRAA\",\"sale_price\":194.0,\"market_price\":199.0,\"type\":\"Face & Body\",\"rating\":3.0,\"description\":\"This face wash with basil and lime extracts gives a younger, fresher and oil-free appearance. This face wash checks acne and controls oil on the face with the help of salicylic acid. If you have an oily skin, then this is the skin care you need. This product is completely paraben and sulphate free.\"}\n",
146
+ "type Document\n",
147
+ "id None\n",
148
+ "metadata {'row': 7248, 'source': 7249}\n",
149
+ "page_content {\"index\":7249,\"product\":\"Active Range Radiance Face Elixir Serum\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"Organic Harvest\",\"sale_price\":1695.75,\"market_price\":1995.0,\"type\":\"Face Care\",\"rating\":null,\"description\":\"A light weight organic beauty fluid for face brightening and anti ageing. It is a special formulation of organic ingredients that help the skin retain its youthful appearance. The beauty serum is a unique blend of oils and organic ingredients formulated to help skin look young and healthy. Aimed to give a glowing skin and to provide deep nourishment, relieve dark circles and repairs pigmentation specially formulated to help fight the signs of skin ageing.\"}\n",
150
+ "type Document\n",
151
+ "id None\n",
152
+ "metadata {'row': 4102, 'source': 4103}\n",
153
+ "page_content {\"index\":4103,\"product\":\"Party Glow Facial Kit\",\"category\":\"Beauty & Hygiene\",\"sub_category\":\"Skin Care\",\"brand\":\"Vlcc\",\"sale_price\":167.05,\"market_price\":257.0,\"type\":\"Face Care\",\"rating\":4.1,\"description\":\"A revolutionary, 6 Step Facial System that helps you get that Facial Glow at the convenience of your home. It is a Do It Yourself Facial Kit, which allows you to get your facial done, all by yourself. It comes in the form of a Kit which combines all the steps. It helps in sloughing off dead skin cells. It also hydrates the skin and maintains its oil balance. This facial kit helps achieve a blemish-free, radiant complexion. It targets the skin areas for dullness and dehydration, making the skin soft and beautiful.Tip: Following a regular skin care regime can help you achieve flawless skin. For more tips on skin care, visit bigbasket lifestyle blog, Click Here to visit bigbasket\\u2019s lifestyle blog\"}\n",
154
+ "type Document\n"
155
+ ]
156
+ }
157
+ ],
158
+ "source": [
159
+ "for doc in docs:\n",
160
+ " for k, v in doc:\n",
161
+ " print(k, v)"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 19,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "\n",
171
+ "class NeuralSearcher:\n",
172
+ "\n",
173
+ " def __init__(self, collection_name: str):\n",
174
+ " self.client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))\n",
175
+ " \n",
176
+ " self.embeddings = DeepInfraEmbeddings(\n",
177
+ " api_key=\"7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk\",\n",
178
+ " base_url=\"https://api.deepinfra.com/v1/openai\"\n",
179
+ " )\n",
180
+ " self.vector_store = Chroma(\n",
181
+ " collection_name=COLLECTION_NAME,\n",
182
+ " embedding_function=self.embeddings, # Pass the DeepInfraEmbeddings instance\n",
183
+ " client=self.client,\n",
184
+ " persist_directory = os.path.join(os.getcwd(), 'vector_stores')\n",
185
+ " )\n",
186
+ " \n",
187
+ " self.llm = ChatOpenAI(\n",
188
+ " model='meta-llama/Meta-Llama-3.1-70B-Instruct',\n",
189
+ " api_key=\"7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk\",\n",
190
+ " base_url=\"https://api.deepinfra.com/v1/openai\",\n",
191
+ " max_tokens = 70000\n",
192
+ " )\n",
193
+ " \n",
194
+ " self.memory = ConversationSummaryMemory(\n",
195
+ " llm=self.llm,\n",
196
+ " memory_key=\"chat_history\",\n",
197
+ " return_messages=True,\n",
198
+ " input_key=\"question\",\n",
199
+ " output_key='answer'\n",
200
+ " )\n",
201
+ " \n",
202
+ " prompt_template = '''\n",
203
+ " About: You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query. \n",
204
+ " User is a human who, as a customer, wants to buy a product from this application.\n",
205
+ "\n",
206
+ " Given below is the summary of conversation between you (AI) and the user (Human):\n",
207
+ " Context: {chat_history}\n",
208
+ "\n",
209
+ " Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:\n",
210
+ " Question: {question}\n",
211
+ "\n",
212
+ " Note: \n",
213
+ " - Give your answer in a compreshenive manner in enumerated format.\n",
214
+ " - Do not generate any information on your own, striclty stick to the provided data. \n",
215
+ " - Also, do not repeat the information that is already present in the context.\n",
216
+ " - If, you feel there is redundant information (or) an product is being described twice, specify that as well in the response.\n",
217
+ " - The tone of the answer should be like a polite and friendly AI Assistant.\n",
218
+ " '''\n",
219
+ " self.PROMPT = PromptTemplate(\n",
220
+ " template=prompt_template, input_variables=[\"chat_history\", \"question\"]\n",
221
+ " )\n",
222
+ "\n",
223
+ " def search(self, question: str, num_results: int, filter_: dict = None) -> dict:\n",
224
+ " chain = RetrievalQAWithSourcesChain.from_chain_type(\n",
225
+ " llm=self.llm,\n",
226
+ " chain_type=\"stuff\",\n",
227
+ " retriever=self.vector_store.as_retriever(search_kwargs={'k':num_results}),\n",
228
+ " memory=self.memory,\n",
229
+ " return_source_documents=True,\n",
230
+ " )\n",
231
+ "\n",
232
+ " gen_prompt = self.PROMPT.format(question=question, chat_history=self.memory.load_memory_variables({})['chat_history'][0].content)\n",
233
+ " start_time = time.time()\n",
234
+ " res = chain(gen_prompt)\n",
235
+ " print(f\"Search took {time.time() - start_time} seconds\")\n",
236
+ "\n",
237
+ " ret = {}\n",
238
+ " ret['answer'] = res['answer']\n",
239
+ "\n",
240
+ " srcs = [json.loads(row.page_content) for row in res['source_documents']]\n",
241
+ "\n",
242
+ " df = pd.DataFrame(srcs)\n",
243
+ " df = df.fillna('null')\n",
244
+ " # df.set_index('product', inplace=True)\n",
245
+ "\n",
246
+ " # df1 = df[['product','brand', 'sale_price', 'rating', 'description', 'category', 'sub_category']]\n",
247
+ " df1 = df\n",
248
+ "\n",
249
+ " # Remove duplicates\n",
250
+ " df1 = df1.drop_duplicates()\n",
251
+ "\n",
252
+ " ret['products'] = df1.to_dict(orient='records')\n",
253
+ " return ret\n",
254
+ " \n",
255
+ " def check_memory_history(self):\n",
256
+ " return self.memory.load_memory_variables({})"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "code",
261
+ "execution_count": 20,
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": [
265
+ "neural_searcher = NeuralSearcher(collection_name=COLLECTION_NAME)"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": 24,
271
+ "metadata": {},
272
+ "outputs": [
273
+ {
274
+ "name": "stdout",
275
+ "output_type": "stream",
276
+ "text": [
277
+ "Search took 4.530710458755493 seconds\n"
278
+ ]
279
+ }
280
+ ],
281
+ "source": [
282
+ "q = \"Suggest me some top 5 Beverages products?\"\n",
283
+ "num_results = 5\n",
284
+ "res = neural_searcher.search(question=q, num_results=num_results)"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 25,
290
+ "metadata": {},
291
+ "outputs": [
292
+ {
293
+ "name": "stdout",
294
+ "output_type": "stream",
295
+ "text": [
296
+ "I don't know.\n",
297
+ "\n",
298
+ "\n"
299
+ ]
300
+ }
301
+ ],
302
+ "source": [
303
+ "print(res['answer'])"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": 26,
309
+ "metadata": {},
310
+ "outputs": [
311
+ {
312
+ "data": {
313
+ "text/plain": [
314
+ "{'answer': \"I don't know.\\n\\n\",\n",
315
+ " 'products': [{'index': 14541,\n",
316
+ " 'product': 'Perfume - Ultra Sensual',\n",
317
+ " 'category': 'Beauty & Hygiene',\n",
318
+ " 'sub_category': \"Men's Grooming\",\n",
319
+ " 'brand': 'Wild Stone',\n",
320
+ " 'sale_price': 569.05,\n",
321
+ " 'market_price': 599.0,\n",
322
+ " 'type': \"Men's Deodorants\",\n",
323
+ " 'rating': 4.5,\n",
324
+ " 'description': 'A bright, fresh and energetic fragrance with a touch of Woody & Musky notes that give it an exciting twist For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
325
+ " {'index': 24485,\n",
326
+ " 'product': 'Perfume - Ultra Sensual',\n",
327
+ " 'category': 'Beauty & Hygiene',\n",
328
+ " 'sub_category': 'Fragrances & Deos',\n",
329
+ " 'brand': 'Wild Stone',\n",
330
+ " 'sale_price': 569.05,\n",
331
+ " 'market_price': 599.0,\n",
332
+ " 'type': \"Men's Deodorants\",\n",
333
+ " 'rating': 4.5,\n",
334
+ " 'description': 'A bright, fresh and energetic fragrance with a touch of Woody & Musky notes that give it an exciting twist For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
335
+ " {'index': 7382,\n",
336
+ " 'product': 'Be Tempted Eau De Parfum',\n",
337
+ " 'category': 'Beauty & Hygiene',\n",
338
+ " 'sub_category': 'Fragrances & Deos',\n",
339
+ " 'brand': 'Dkny',\n",
340
+ " 'sale_price': 3485.0,\n",
341
+ " 'market_price': 4100.0,\n",
342
+ " 'type': 'Eau De Parfum',\n",
343
+ " 'rating': 5.0,\n",
344
+ " 'description': 'Have you ever wondered if you should? \\xa0If you dare? \\xa0Give in to the temptation with DKNY Be Tempted, a provocative delectably addictive new fragrance that expresses an overt willing and wants for sensorial seduction.'},\n",
345
+ " {'index': 19717,\n",
346
+ " 'product': 'Perfume - Forest Spice',\n",
347
+ " 'category': 'Beauty & Hygiene',\n",
348
+ " 'sub_category': \"Men's Grooming\",\n",
349
+ " 'brand': 'Wild Stone',\n",
350
+ " 'sale_price': 359.4,\n",
351
+ " 'market_price': 599.0,\n",
352
+ " 'type': \"Men's Deodorants\",\n",
353
+ " 'rating': 3.0,\n",
354
+ " 'description': 'A bright, fresh and energetic fragrance with a touch of Woody & Oriental notes that give it an exciting twist For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
355
+ " {'index': 2919,\n",
356
+ " 'product': 'Body Deodorant - Aqua Fresh',\n",
357
+ " 'category': 'Beauty & Hygiene',\n",
358
+ " 'sub_category': 'Fragrances & Deos',\n",
359
+ " 'brand': 'Wild Stone',\n",
360
+ " 'sale_price': 169.15,\n",
361
+ " 'market_price': 199.0,\n",
362
+ " 'type': \"Men's Deodorants\",\n",
363
+ " 'rating': 'null',\n",
364
+ " 'description': 'The ultimate range of irresistible masculine fragrances. Makes Its Happen For Beauty tips, tricks & more visitÂÂ\\xa0https://bigbasket.blog/'}]}"
365
+ ]
366
+ },
367
+ "execution_count": 26,
368
+ "metadata": {},
369
+ "output_type": "execute_result"
370
+ }
371
+ ],
372
+ "source": [
373
+ "res"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 27,
379
+ "metadata": {},
380
+ "outputs": [],
381
+ "source": [
382
+ "mem = neural_searcher.check_memory_history()"
383
+ ]
384
+ },
385
+ {
386
+ "cell_type": "code",
387
+ "execution_count": 28,
388
+ "metadata": {},
389
+ "outputs": [
390
+ {
391
+ "name": "stdout",
392
+ "output_type": "stream",
393
+ "text": [
394
+ "[SystemMessage(content='The human is seeking product recommendations from the Product Recommendation Agent, specifically asking for the top 5 Kitchen, Garden & Pets products, and later asks for the top 5 Beverages products. The AI is unsure and responds with \"I don\\'t know\" to both requests.')]\n"
395
+ ]
396
+ }
397
+ ],
398
+ "source": [
399
+ "print(mem[\"chat_history\"])"
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": 128,
405
+ "metadata": {},
406
+ "outputs": [
407
+ {
408
+ "data": {
409
+ "text/plain": [
410
+ "{'chat_history': [SystemMessage(content='Here is the new summary:\\n\\nThe human asks the AI for top 5 hair product suggestions. The AI provides a list of 5 hair products based on the retrieved descriptions, including a professional brush, hair roller, hair gel, and an Ayurvedic balm. The AI notes that two of the products, the Professional Brush and Professional Brush - Roller, seem to be similar, with the only difference being the addition of \"Roller\" in the latter. The human asks for more information about the first product, which is the Professional Brush. The AI provides additional details about the Professional Brush, including its category, sub-category, brand, price, type, rating, and description, and reiterates that it seems similar to the Professional Brush - Roller product. The human then asks if there are any other similar products, and the AI responds by mentioning the Professional Brush - Roller as a similar product, noting that it has almost identical specifications and description as the Professional Brush.')]}"
411
+ ]
412
+ },
413
+ "execution_count": 128,
414
+ "metadata": {},
415
+ "output_type": "execute_result"
416
+ }
417
+ ],
418
+ "source": [
419
+ "mem"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": 120,
425
+ "metadata": {},
426
+ "outputs": [
427
+ {
428
+ "name": "stdout",
429
+ "output_type": "stream",
430
+ "text": [
431
+ "Search took 26.596819162368774 seconds\n"
432
+ ]
433
+ }
434
+ ],
435
+ "source": [
436
+ "q = \"Tell me more about the first product\"\n",
437
+ "num_results = 5\n",
438
+ "res = neural_searcher.search(question=q, num_results=num_results)"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": 121,
444
+ "metadata": {},
445
+ "outputs": [
446
+ {
447
+ "name": "stdout",
448
+ "output_type": "stream",
449
+ "text": [
450
+ "I'm happy to help you with your question about the first product!\n",
451
+ "\n",
452
+ "Based on our previous conversation, the first product I mentioned was the \"Professional Brush\". Here are some additional details about this product:\n",
453
+ "\n",
454
+ "1. **Product Name**: Professional Brush\n",
455
+ "2. **Category**: Beauty & Hygiene\n",
456
+ "3. **Sub-Category**: Hair Care\n",
457
+ "4. **Brand**: Salon\n",
458
+ "5. **Sale Price**: ₹500.0\n",
459
+ "6. **Market Price**: ₹500.0\n",
460
+ "7. **Type**: Tools & Accessories\n",
461
+ "8. **Rating**: 5.0\n",
462
+ "9. **Description**: The best brushes will render application effortless and optimise the performance of your makeup products to their full potential.\n",
463
+ "\n",
464
+ "Please note that this product seems to be similar to the \"Professional Brush - Roller\" product, with the only difference being the addition of \"Roller\" in the latter. If you'd like to know more about the differences between these two products, I'd be happy to help!\n",
465
+ "\n",
466
+ "\n"
467
+ ]
468
+ }
469
+ ],
470
+ "source": [
471
+ "print(res['answer'])"
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": 122,
477
+ "metadata": {},
478
+ "outputs": [
479
+ {
480
+ "data": {
481
+ "text/plain": [
482
+ "{'answer': 'I\\'m happy to help you with your question about the first product!\\n\\nBased on our previous conversation, the first product I mentioned was the \"Professional Brush\". Here are some additional details about this product:\\n\\n1. **Product Name**: Professional Brush\\n2. **Category**: Beauty & Hygiene\\n3. **Sub-Category**: Hair Care\\n4. **Brand**: Salon\\n5. **Sale Price**: ₹500.0\\n6. **Market Price**: ₹500.0\\n7. **Type**: Tools & Accessories\\n8. **Rating**: 5.0\\n9. **Description**: The best brushes will render application effortless and optimise the performance of your makeup products to their full potential.\\n\\nPlease note that this product seems to be similar to the \"Professional Brush - Roller\" product, with the only difference being the addition of \"Roller\" in the latter. If you\\'d like to know more about the differences between these two products, I\\'d be happy to help!\\n\\n',\n",
483
+ " 'products': [{'product': 'Professional Brush - Roller',\n",
484
+ " 'brand': 'Salon',\n",
485
+ " 'sale_price': 500.0,\n",
486
+ " 'rating': 4.0,\n",
487
+ " 'description': 'The bestÀš\\xa0brushesÀš\\xa0will render application effortless and optimise the performance of your makeup products to their full potential For Beauty tips, tricks & more visit\\xa0https://bigbasket.blog/'},\n",
488
+ " {'product': 'Professional Brush',\n",
489
+ " 'brand': 'Salon',\n",
490
+ " 'sale_price': 500.0,\n",
491
+ " 'rating': 5.0,\n",
492
+ " 'description': 'The bestÂÂ\\xa0brushesÂÂ\\xa0will render application effortless and optimise the performance of your makeup products to their full potential For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
493
+ " {'product': 'Hair Roller - Medium 20 mm',\n",
494
+ " 'brand': 'Daiou',\n",
495
+ " 'sale_price': 300.0,\n",
496
+ " 'rating': 4.0,\n",
497
+ " 'description': 'For Hair stylers For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
498
+ " {'product': 'Balm - Ultra Power',\n",
499
+ " 'brand': 'Zandu',\n",
500
+ " 'sale_price': 42.0,\n",
501
+ " 'rating': 4.4,\n",
502
+ " 'description': 'Ayurvedic Proprietary Medicine For External Use Only For Beauty tips, tricks & more visitÂÂÂ\\xa0https://bigbasket.blog/'},\n",
503
+ " {'product': 'Hair Care Kit - Anti Hair Fall',\n",
504
+ " 'brand': 'Mamaearth',\n",
505
+ " 'sale_price': 999.0,\n",
506
+ " 'rating': 2.8,\n",
507
+ " 'description': 'Hi! I am Mamaearths Anti Hair Fall Control Kit. I am full of natural and organic bio-actives which help promote hair fall control & hair regrowth. I follow a 4 step process from root to tip to ensure I fix all your hair issues. Use me regularly and you wont ever need another hair products.'}]}"
508
+ ]
509
+ },
510
+ "execution_count": 122,
511
+ "metadata": {},
512
+ "output_type": "execute_result"
513
+ }
514
+ ],
515
+ "source": [
516
+ "res"
517
+ ]
518
+ },
519
+ {
520
+ "cell_type": "code",
521
+ "execution_count": 123,
522
+ "metadata": {},
523
+ "outputs": [
524
+ {
525
+ "name": "stdout",
526
+ "output_type": "stream",
527
+ "text": [
528
+ "Search took 21.11023235321045 seconds\n"
529
+ ]
530
+ }
531
+ ],
532
+ "source": [
533
+ "q = \"Are there any other similar products?\"\n",
534
+ "num_results = 5\n",
535
+ "res = neural_searcher.search(question=q, num_results=num_results)"
536
+ ]
537
+ },
538
+ {
539
+ "cell_type": "code",
540
+ "execution_count": 124,
541
+ "metadata": {},
542
+ "outputs": [
543
+ {
544
+ "name": "stdout",
545
+ "output_type": "stream",
546
+ "text": [
547
+ "Based on the provided data, here are some similar products to the Professional Brush:\n",
548
+ "\n",
549
+ "1. **Professional Brush - Roller**: This product seems to be very similar to the Professional Brush, with the only difference being the addition of \"Roller\" in the name. The description and specifications are almost identical. (\n"
550
+ ]
551
+ }
552
+ ],
553
+ "source": [
554
+ "print(res['answer'])"
555
+ ]
556
+ },
557
+ {
558
+ "cell_type": "code",
559
+ "execution_count": 125,
560
+ "metadata": {},
561
+ "outputs": [
562
+ {
563
+ "data": {
564
+ "text/plain": [
565
+ "{'answer': 'Based on the provided data, here are some similar products to the Professional Brush:\\n\\n1. **Professional Brush - Roller**: This product seems to be very similar to the Professional Brush, with the only difference being the addition of \"Roller\" in the name. The description and specifications are almost identical. (',\n",
566
+ " 'products': [{'product': 'Professional Brush - Roller',\n",
567
+ " 'brand': 'Salon',\n",
568
+ " 'sale_price': 500.0,\n",
569
+ " 'rating': 4.0,\n",
570
+ " 'description': 'The bestÀš\\xa0brushesÀš\\xa0will render application effortless and optimise the performance of your makeup products to their full potential For Beauty tips, tricks & more visit\\xa0https://bigbasket.blog/'},\n",
571
+ " {'product': 'Professional Brush',\n",
572
+ " 'brand': 'Salon',\n",
573
+ " 'sale_price': 500.0,\n",
574
+ " 'rating': 5.0,\n",
575
+ " 'description': 'The bestÂÂ\\xa0brushesÂÂ\\xa0will render application effortless and optimise the performance of your makeup products to their full potential For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
576
+ " {'product': 'Hair Roller - Medium 20 mm',\n",
577
+ " 'brand': 'Daiou',\n",
578
+ " 'sale_price': 300.0,\n",
579
+ " 'rating': 4.0,\n",
580
+ " 'description': 'For Hair stylers For Beauty tips, tricks & more visitÂ\\xa0https://bigbasket.blog/'},\n",
581
+ " {'product': 'Balm - Ultra Power',\n",
582
+ " 'brand': 'Zandu',\n",
583
+ " 'sale_price': 42.0,\n",
584
+ " 'rating': 4.4,\n",
585
+ " 'description': 'Ayurvedic Proprietary Medicine For External Use Only For Beauty tips, tricks & more visitÂÂÂ\\xa0https://bigbasket.blog/'},\n",
586
+ " {'product': 'Hair Spray',\n",
587
+ " 'brand': 'Novagold',\n",
588
+ " 'sale_price': 1200.0,\n",
589
+ " 'rating': 1.0,\n",
590
+ " 'description': 'Info not in English For Beauty tips, tricks & more visitÀš\\xa0https://bigbasket.blog/'}]}"
591
+ ]
592
+ },
593
+ "execution_count": 125,
594
+ "metadata": {},
595
+ "output_type": "execute_result"
596
+ }
597
+ ],
598
+ "source": [
599
+ "res"
600
+ ]
601
+ },
602
+ {
603
+ "cell_type": "code",
604
+ "execution_count": 5,
605
+ "metadata": {},
606
+ "outputs": [],
607
+ "source": [
608
+ "import yaml\n",
609
+ "# Read YAML file\n",
610
+ "with open(\"config.yaml\", 'r') as stream:\n",
611
+ " data_loaded = yaml.safe_load(stream)"
612
+ ]
613
+ },
614
+ {
615
+ "cell_type": "code",
616
+ "execution_count": 7,
617
+ "metadata": {},
618
+ "outputs": [
619
+ {
620
+ "data": {
621
+ "text/plain": [
622
+ "{'COLLECTION_NAME': 'big-basket-products-all', 'API_KEY': ''}"
623
+ ]
624
+ },
625
+ "execution_count": 7,
626
+ "metadata": {},
627
+ "output_type": "execute_result"
628
+ }
629
+ ],
630
+ "source": [
631
+ "data_loaded"
632
+ ]
633
+ },
634
+ {
635
+ "cell_type": "code",
636
+ "execution_count": null,
637
+ "metadata": {},
638
+ "outputs": [],
639
+ "source": []
640
+ }
641
+ ],
642
+ "metadata": {
643
+ "kernelspec": {
644
+ "display_name": "test",
645
+ "language": "python",
646
+ "name": "python3"
647
+ },
648
+ "language_info": {
649
+ "codemirror_mode": {
650
+ "name": "ipython",
651
+ "version": 3
652
+ },
653
+ "file_extension": ".py",
654
+ "mimetype": "text/x-python",
655
+ "name": "python",
656
+ "nbconvert_exporter": "python",
657
+ "pygments_lexer": "ipython3",
658
+ "version": "3.12.4"
659
+ }
660
+ },
661
+ "nbformat": 4,
662
+ "nbformat_minor": 2
663
+ }