from sentence_transformers import SentenceTransformer import faiss import json import numpy as np # Initialize the sentence transformer model model = SentenceTransformer('all-MiniLM-L6-v2') index_path = 'vector_indexNLP.faiss' metadata_path = 'metadataNLP.json' # Load FAISS index and metadata index = faiss.read_index(index_path) with open(metadata_path, 'r') as f: metadata = json.load(f) def convert_distance_to_similarity(distance): # Assuming the distances are non-negative, we can use a simple conversion: return 1 / (1 + distance)*100 def query_index(query, model, index, metadata, top_k=5): query_embedding = model.encode(query).reshape(1,-1).astype('float32') D, I = index.search(query_embedding, top_k) results = [] for i in range(top_k): doc_metadata = metadata[I[0, i]] similarity_score = convert_distance_to_similarity(D[0, i]) result = { "filename": doc_metadata["filename"], "page_num": doc_metadata["page_num"], "standardized_text": doc_metadata["standardized_text"], "question_text":doc_metadata["question_text"], "answerable_text":doc_metadata["answerable_text"], "score":similarity_score } results.append(result) return results query = "what is Rule-Based Machine Translation?" results = query_index(query, model, index, metadata) def create_answer_to_show(query, results): answer = f"Based on your query '{query}', the following relevant information was found:\n\n" for result in results: answer += "\n------------------------------------------------------------------------------------------------------------------\n" answer += f"Filename: {result['filename']}\n" answer += f"Page number: {result['page_num']}\n" answer += f"Related keywords: {result['question_text'][:100]}...\n" if result['answerable_text']!="": answer += f"Answer: {result['answerable_text'][:500]}\n" answer += f"Relevancy Score: {result['score']}\n" answer += "\nFor more detailed information, please refer to the respective original texts.\n\n\n" return answer answer = create_answer_to_show(query, results) print(answer)