rivapereira123 commited on
Commit
c150284
·
verified ·
1 Parent(s): 1132e0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -5
app.py CHANGED
@@ -222,7 +222,33 @@ class EnhancedGazaKnowledgeBase:
222
  self._load_vector_store()
223
  else:
224
  self._create_vector_store()
225
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  def _load_vector_store(self):
227
  """Load existing vector store with error handling"""
228
  try:
@@ -314,10 +340,7 @@ class EnhancedGazaKnowledgeBase:
314
  logger.info("Training IVF index...")
315
  # Get some embeddings for training
316
  sample_texts = [doc.text[:500] for doc in documents[:100]] # Sample for training
317
- sample_embeddings = np.array([
318
- self.embedding_model.get_text_embedding(text)
319
- for text in sample_texts
320
- ]).astype('float32')
321
  faiss_index.train(sample_embeddings)
322
  logger.info("IVF index training completed")
323
 
 
222
  self._load_vector_store()
223
  else:
224
  self._create_vector_store()
225
+
226
+
227
+
228
+ def _batch_embed_with_retry(self, texts, batch_size=16, max_retries=3, delay=2):
229
+ """Embed texts in batches with retry fallback and logging"""
230
+ embeddings = []
231
+ for i in range(0, len(texts), batch_size):
232
+ batch = texts[i:i+batch_size]
233
+ for attempt in range(max_retries):
234
+ try:
235
+ batch_embeddings = self.embedding_model.get_text_embedding_batch(batch)
236
+ embeddings.extend(batch_embeddings)
237
+ break # Success
238
+ except Exception as e:
239
+ if attempt < max_retries - 1:
240
+ logger.warning(f"Batch embedding failed (attempt {attempt+1}): {e}. Retrying...")
241
+ time.sleep(delay * (attempt + 1))
242
+ else:
243
+ logger.error(f"❌ Final failure embedding batch {i}-{i+len(batch)}: {e}")
244
+ # Optionally fall back to single embeddings
245
+ for text in batch:
246
+ try:
247
+ embeddings.append(self.embedding_model.get_text_embedding(text))
248
+ except Exception as sub_e:
249
+ logger.error(f"Failed to embed individual text: {sub_e}")
250
+ return embeddings
251
+
252
  def _load_vector_store(self):
253
  """Load existing vector store with error handling"""
254
  try:
 
340
  logger.info("Training IVF index...")
341
  # Get some embeddings for training
342
  sample_texts = [doc.text[:500] for doc in documents[:100]] # Sample for training
343
+ sample_embeddings = np.array(self._batch_embed_with_retry(sample_texts, batch_size=16)).astype('float32')
 
 
 
344
  faiss_index.train(sample_embeddings)
345
  logger.info("IVF index training completed")
346