Update embedding.py
Browse files- embedding.py +44 -35
embedding.py
CHANGED
@@ -76,8 +76,8 @@ def feature_extraction(tag, history , context):
|
|
76 |
Respond with the updated Tag_History.
|
77 |
'''
|
78 |
|
79 |
-
model = random.choice([gemini,gemini1,gemini2,gemini3])
|
80 |
-
result =
|
81 |
|
82 |
return result.content
|
83 |
|
@@ -180,22 +180,27 @@ def detailed_history(history):
|
|
180 |
return details
|
181 |
|
182 |
|
183 |
-
def get_embeddings(link):
|
184 |
|
185 |
print(f"\nCreating Embeddings ----- {link}")
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
# Extract Text -----------------------------
|
201 |
print("Extracting Text")
|
@@ -206,25 +211,29 @@ def get_embeddings(link):
|
|
206 |
|
207 |
# Create Chunks ----------------------------
|
208 |
print("Writing Tag Data")
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
-
# history = detailed_history(history)
|
228 |
print("Creating Vectors")
|
229 |
genai_embeddings=[]
|
230 |
|
@@ -249,6 +258,6 @@ text_splitter = RecursiveCharacterTextSplitter(
|
|
249 |
separators = ["",''," "]
|
250 |
)
|
251 |
|
252 |
-
|
253 |
if __name__ == '__main__':
|
|
|
254 |
pass
|
|
|
76 |
Respond with the updated Tag_History.
|
77 |
'''
|
78 |
|
79 |
+
# model = random.choice([gemini,gemini1,gemini2,gemini3])
|
80 |
+
result = gemini1.invoke(prompt)
|
81 |
|
82 |
return result.content
|
83 |
|
|
|
180 |
return details
|
181 |
|
182 |
|
183 |
+
def get_embeddings(link,tag_option):
|
184 |
|
185 |
print(f"\nCreating Embeddings ----- {link}")
|
186 |
+
|
187 |
+
if tag_option=='Single':
|
188 |
+
history = { "Details": "" }
|
189 |
+
|
190 |
+
else:
|
191 |
+
history = {
|
192 |
+
"Introduction": "",
|
193 |
+
"Specifications": "",
|
194 |
+
"Product Overview": "",
|
195 |
+
"Safety Information": "",
|
196 |
+
"Installation Instructions": "",
|
197 |
+
"Setup and Configuration": "",
|
198 |
+
"Operation Instructions": "",
|
199 |
+
"Maintenance and Care": "",
|
200 |
+
"Troubleshooting": "",
|
201 |
+
"Warranty Information": "",
|
202 |
+
"Legal Information": ""
|
203 |
+
}
|
204 |
|
205 |
# Extract Text -----------------------------
|
206 |
print("Extracting Text")
|
|
|
211 |
|
212 |
# Create Chunks ----------------------------
|
213 |
print("Writing Tag Data")
|
214 |
+
|
215 |
+
if tag_option=="Single":
|
216 |
+
history["Details"] = feature_extraction("Details", history["Details"], text[0][:50000])
|
217 |
+
|
218 |
+
else:
|
219 |
+
chunks = text_splitter.create_documents(text)
|
220 |
+
|
221 |
+
for chunk in chunks:
|
222 |
+
|
223 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
224 |
+
future_to_key = {
|
225 |
+
executor.submit(
|
226 |
+
feature_extraction, f"Product {key}", history[key], chunk.page_content
|
227 |
+
): key for key in history
|
228 |
+
}
|
229 |
+
for future in concurrent.futures.as_completed(future_to_key):
|
230 |
+
key = future_to_key[future]
|
231 |
+
try:
|
232 |
+
response = future.result()
|
233 |
+
history[key] = response
|
234 |
+
except Exception as e:
|
235 |
+
print(f"Error processing {key}: {e}")
|
236 |
|
|
|
237 |
print("Creating Vectors")
|
238 |
genai_embeddings=[]
|
239 |
|
|
|
258 |
separators = ["",''," "]
|
259 |
)
|
260 |
|
|
|
261 |
if __name__ == '__main__':
|
262 |
+
# print(get_embeddings('https://www.galaxys24manual.com/wp-content/uploads/pdf/galaxy-s24-manual-SAM-S921-S926-S928-OS14-011824-FINAL-US-English.pdf',"Single"))
|
263 |
pass
|