# Import necessary modules import re import faiss from sentence_transformers import SentenceTransformer # Clean text function def clean_text(text): """ Cleans text by removing unnecessary symbols and whitespace. """ text = re.sub(r"\s+", " ", text) # Replace multiple spaces with one text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text) # Keep Arabic, English, and punctuation return text.strip() # Create FAISS index def create_faiss_index(texts): """ Create a FAISS index from the provided list of texts. """ # Clean the text before indexing texts = [clean_text(t) for t in texts] # Load pre-trained SentenceTransformer model model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") embeddings = model.encode(texts) # Create the FAISS index dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) return index, texts # Search the FAISS index def search_faiss(faiss_index, stored_texts, query, top_k=3): """ Search FAISS for the most relevant texts. """ from sentence_transformers import SentenceTransformer model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") query_embedding = model.encode([query]) distances, indices = faiss_index.search(query_embedding, top_k) results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)] return results