Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -222,7 +222,33 @@ class EnhancedGazaKnowledgeBase: | |
| 222 | 
             
                        self._load_vector_store()
         | 
| 223 | 
             
                    else:
         | 
| 224 | 
             
                        self._create_vector_store()
         | 
| 225 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 226 | 
             
                def _load_vector_store(self):
         | 
| 227 | 
             
                    """Load existing vector store with error handling"""
         | 
| 228 | 
             
                    try:
         | 
| @@ -314,10 +340,7 @@ class EnhancedGazaKnowledgeBase: | |
| 314 | 
             
                        logger.info("Training IVF index...")
         | 
| 315 | 
             
                        # Get some embeddings for training
         | 
| 316 | 
             
                        sample_texts = [doc.text[:500] for doc in documents[:100]]  # Sample for training
         | 
| 317 | 
            -
                        sample_embeddings = np.array( | 
| 318 | 
            -
                            self.embedding_model.get_text_embedding(text) 
         | 
| 319 | 
            -
                            for text in sample_texts
         | 
| 320 | 
            -
                        ]).astype('float32')
         | 
| 321 | 
             
                        faiss_index.train(sample_embeddings)
         | 
| 322 | 
             
                        logger.info("IVF index training completed")
         | 
| 323 |  | 
|  | |
| 222 | 
             
                        self._load_vector_store()
         | 
| 223 | 
             
                    else:
         | 
| 224 | 
             
                        self._create_vector_store()
         | 
| 225 | 
            +
             | 
| 226 | 
            +
             | 
| 227 | 
            +
             | 
| 228 | 
            +
                def _batch_embed_with_retry(self, texts, batch_size=16, max_retries=3, delay=2):
         | 
| 229 | 
            +
                """Embed texts in batches with retry fallback and logging"""
         | 
| 230 | 
            +
                embeddings = []
         | 
| 231 | 
            +
                for i in range(0, len(texts), batch_size):
         | 
| 232 | 
            +
                    batch = texts[i:i+batch_size]
         | 
| 233 | 
            +
                    for attempt in range(max_retries):
         | 
| 234 | 
            +
                        try:
         | 
| 235 | 
            +
                            batch_embeddings = self.embedding_model.get_text_embedding_batch(batch)
         | 
| 236 | 
            +
                            embeddings.extend(batch_embeddings)
         | 
| 237 | 
            +
                            break  # Success
         | 
| 238 | 
            +
                        except Exception as e:
         | 
| 239 | 
            +
                            if attempt < max_retries - 1:
         | 
| 240 | 
            +
                                logger.warning(f"Batch embedding failed (attempt {attempt+1}): {e}. Retrying...")
         | 
| 241 | 
            +
                                time.sleep(delay * (attempt + 1))
         | 
| 242 | 
            +
                            else:
         | 
| 243 | 
            +
                                logger.error(f"❌ Final failure embedding batch {i}-{i+len(batch)}: {e}")
         | 
| 244 | 
            +
                                # Optionally fall back to single embeddings
         | 
| 245 | 
            +
                                for text in batch:
         | 
| 246 | 
            +
                                    try:
         | 
| 247 | 
            +
                                        embeddings.append(self.embedding_model.get_text_embedding(text))
         | 
| 248 | 
            +
                                    except Exception as sub_e:
         | 
| 249 | 
            +
                                        logger.error(f"Failed to embed individual text: {sub_e}")
         | 
| 250 | 
            +
                return embeddings
         | 
| 251 | 
            +
             | 
| 252 | 
             
                def _load_vector_store(self):
         | 
| 253 | 
             
                    """Load existing vector store with error handling"""
         | 
| 254 | 
             
                    try:
         | 
|  | |
| 340 | 
             
                        logger.info("Training IVF index...")
         | 
| 341 | 
             
                        # Get some embeddings for training
         | 
| 342 | 
             
                        sample_texts = [doc.text[:500] for doc in documents[:100]]  # Sample for training
         | 
| 343 | 
            +
                        sample_embeddings = np.array(self._batch_embed_with_retry(sample_texts, batch_size=16)).astype('float32')
         | 
|  | |
|  | |
|  | |
| 344 | 
             
                        faiss_index.train(sample_embeddings)
         | 
| 345 | 
             
                        logger.info("IVF index training completed")
         | 
| 346 |  |