That1BrainCell commited on
Commit
a33fddf
·
verified ·
1 Parent(s): 28a52ae

Update embedding.py

Browse files
Files changed (1) hide show
  1. embedding.py +44 -35
embedding.py CHANGED
@@ -76,8 +76,8 @@ def feature_extraction(tag, history , context):
76
  Respond with the updated Tag_History.
77
  '''
78
 
79
- model = random.choice([gemini,gemini1,gemini2,gemini3])
80
- result = model.invoke(prompt)
81
 
82
  return result.content
83
 
@@ -180,22 +180,27 @@ def detailed_history(history):
180
  return details
181
 
182
 
183
- def get_embeddings(link):
184
 
185
  print(f"\nCreating Embeddings ----- {link}")
186
- history = {
187
- "Introduction": "",
188
- "Specifications": "",
189
- "Product Overview": "",
190
- "Safety Information": "",
191
- "Installation Instructions": "",
192
- "Setup and Configuration": "",
193
- "Operation Instructions": "",
194
- "Maintenance and Care": "",
195
- "Troubleshooting": "",
196
- "Warranty Information": "",
197
- "Legal Information": ""
198
- }
 
 
 
 
 
199
 
200
  # Extract Text -----------------------------
201
  print("Extracting Text")
@@ -206,25 +211,29 @@ def get_embeddings(link):
206
 
207
  # Create Chunks ----------------------------
208
  print("Writing Tag Data")
209
- chunks = text_splitter.create_documents(text)
210
-
211
- for chunk in chunks:
212
-
213
- with concurrent.futures.ThreadPoolExecutor() as executor:
214
- future_to_key = {
215
- executor.submit(
216
- feature_extraction, f"Product {key}", history[key], chunk.page_content
217
- ): key for key in history
218
- }
219
- for future in concurrent.futures.as_completed(future_to_key):
220
- key = future_to_key[future]
221
- try:
222
- response = future.result()
223
- history[key] = response
224
- except Exception as e:
225
- print(f"Error processing {key}: {e}")
 
 
 
 
 
226
 
227
- # history = detailed_history(history)
228
  print("Creating Vectors")
229
  genai_embeddings=[]
230
 
@@ -249,6 +258,6 @@ text_splitter = RecursiveCharacterTextSplitter(
249
  separators = ["",''," "]
250
  )
251
 
252
-
253
  if __name__ == '__main__':
 
254
  pass
 
76
  Respond with the updated Tag_History.
77
  '''
78
 
79
+ # model = random.choice([gemini,gemini1,gemini2,gemini3])
80
+ result = gemini1.invoke(prompt)
81
 
82
  return result.content
83
 
 
180
  return details
181
 
182
 
183
+ def get_embeddings(link,tag_option):
184
 
185
  print(f"\nCreating Embeddings ----- {link}")
186
+
187
+ if tag_option=='Single':
188
+ history = { "Details": "" }
189
+
190
+ else:
191
+ history = {
192
+ "Introduction": "",
193
+ "Specifications": "",
194
+ "Product Overview": "",
195
+ "Safety Information": "",
196
+ "Installation Instructions": "",
197
+ "Setup and Configuration": "",
198
+ "Operation Instructions": "",
199
+ "Maintenance and Care": "",
200
+ "Troubleshooting": "",
201
+ "Warranty Information": "",
202
+ "Legal Information": ""
203
+ }
204
 
205
  # Extract Text -----------------------------
206
  print("Extracting Text")
 
211
 
212
  # Create Chunks ----------------------------
213
  print("Writing Tag Data")
214
+
215
+ if tag_option=="Single":
216
+ history["Details"] = feature_extraction("Details", history["Details"], text[0][:50000])
217
+
218
+ else:
219
+ chunks = text_splitter.create_documents(text)
220
+
221
+ for chunk in chunks:
222
+
223
+ with concurrent.futures.ThreadPoolExecutor() as executor:
224
+ future_to_key = {
225
+ executor.submit(
226
+ feature_extraction, f"Product {key}", history[key], chunk.page_content
227
+ ): key for key in history
228
+ }
229
+ for future in concurrent.futures.as_completed(future_to_key):
230
+ key = future_to_key[future]
231
+ try:
232
+ response = future.result()
233
+ history[key] = response
234
+ except Exception as e:
235
+ print(f"Error processing {key}: {e}")
236
 
 
237
  print("Creating Vectors")
238
  genai_embeddings=[]
239
 
 
258
  separators = ["",''," "]
259
  )
260
 
 
261
  if __name__ == '__main__':
262
+ # print(get_embeddings('https://www.galaxys24manual.com/wp-content/uploads/pdf/galaxy-s24-manual-SAM-S921-S926-S928-OS14-011824-FINAL-US-English.pdf',"Single"))
263
  pass