DeepMostInnovations commited on
Commit
cd44c92
·
verified ·
1 Parent(s): 8504055

Upload Hindi embeddings model and all associated files

Browse files
Files changed (2) hide show
  1. hindi-rag-system.py +117 -137
  2. hindi-rag-system.py.amltmp +117 -137
hindi-rag-system.py CHANGED
@@ -744,35 +744,71 @@ def load_llama_model(model_name="unsloth/Llama-3.2-1B-Instruct", device="cuda"):
744
 
745
  return model, tokenizer
746
 
747
- def setup_qa_system(model, tokenizer, vector_store):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  """
749
- Set up a direct QA system using the model and retriever
750
  """
751
  # Create retriever
752
  retriever = vector_store.as_retriever(
753
  search_type="similarity",
754
- search_kwargs={"k": 3}
755
  )
756
 
757
- # Create a function to generate answers
758
- def generate_answer(query):
759
- # Retrieve documents
760
- try:
761
- docs = retriever.invoke(query)
762
- except:
763
- # Fallback to older method if invoke isn't available
764
- docs = retriever.get_relevant_documents(query)
765
 
766
- # Extract the content
767
- context = "\n\n".join([doc.page_content for doc in docs])
768
 
769
- # Create prompt
770
  prompt = f"""
771
  आपको निम्नलिखित संदर्भ से जानकारी के आधार पर एक प्रश्न का उत्तर देना है।
772
- यदि आप उत्तर नहीं जानते हैं, तो बस "मुझे नहीं पता" कहें।
773
 
774
  संदर्भ:
775
- {context}
776
 
777
  प्रश्न: {query}
778
 
@@ -797,7 +833,7 @@ def setup_qa_system(model, tokenizer, vector_store):
797
  do_sample=True
798
  )
799
  except Exception as e:
800
- return f"Error generating response: {str(e)}"
801
 
802
  # Decode the generated text
803
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -805,9 +841,9 @@ def setup_qa_system(model, tokenizer, vector_store):
805
  # Extract just the answer part (after the prompt)
806
  answer = full_response.split("उत्तर:")[-1].strip()
807
 
808
- return answer
809
 
810
- return generate_answer
811
 
812
  # Main RAG functions
813
  def index_text_files(model, tokenizer, data_dir, output_dir, device="cuda", chunk_size=500):
@@ -858,54 +894,10 @@ def query_text_corpus(model, tokenizer, vector_store_path, query, k=6, device="c
858
  # Perform similarity search
859
  results = perform_similarity_search(vector_store, query, k=k)
860
 
861
- # Post-process results to combine adjacent chunks if they're from the same source
862
- processed_results = []
863
- seen_chunks = set()
864
-
865
- for doc, score in results:
866
- chunk_id = doc.metadata["chunk_id"]
867
- source = doc.metadata["source"]
868
-
869
- # Skip if we've already included this chunk
870
- if (source, chunk_id) in seen_chunks:
871
- continue
872
-
873
- seen_chunks.add((source, chunk_id))
874
-
875
- # Try to find adjacent chunks and combine them
876
- combined_content = doc.page_content
877
-
878
- # Look for adjacent chunks in results (both previous and next)
879
- for adj_id in [chunk_id-1, chunk_id+1]:
880
- for other_doc, _ in results:
881
- if (other_doc.metadata["source"] == source and
882
- other_doc.metadata["chunk_id"] == adj_id and
883
- (source, adj_id) not in seen_chunks):
884
-
885
- # Add the adjacent chunk content
886
- if adj_id < chunk_id: # Previous chunk
887
- combined_content = other_doc.page_content + " " + combined_content
888
- else: # Next chunk
889
- combined_content = combined_content + " " + other_doc.page_content
890
-
891
- seen_chunks.add((source, adj_id))
892
-
893
- # Create a new document with combined content
894
- combined_doc = Document(
895
- page_content=combined_content,
896
- metadata={
897
- "source": source,
898
- "chunk_id": chunk_id,
899
- "is_combined": True if combined_content != doc.page_content else False
900
- }
901
- )
902
-
903
- processed_results.append((combined_doc, score))
904
-
905
- return processed_results, vector_store
906
 
907
  def main():
908
- parser = argparse.ArgumentParser(description="Hindi RAG System with LangChain and FAISS")
909
  parser.add_argument("--model_dir", type=str, default="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final",
910
  help="Directory containing the model and tokenizer")
911
  parser.add_argument("--tokenizer_dir", type=str, default="/home/ubuntu/hindi_tokenizer",
@@ -928,10 +920,12 @@ def main():
928
  help="Run in interactive mode for querying")
929
  parser.add_argument("--reindex", action="store_true",
930
  help="Force reindexing even if index exists")
931
- parser.add_argument("--qa", action="store_true",
932
- help="Use LLM for question answering instead of just retrieval")
933
  parser.add_argument("--llm_name", type=str, default="unsloth/Llama-3.2-1B-Instruct",
934
  help="HuggingFace model name for the LLM")
 
 
 
 
935
  args = parser.parse_args()
936
 
937
  # Load embedding model and tokenizer
@@ -943,20 +937,15 @@ def main():
943
  # Create vector store path
944
  vector_store_path = os.path.join(args.output_dir, "faiss_index")
945
 
946
- # Load LLM if QA is enabled
947
- llm_model = None
948
- llm_tokenizer = None
949
- qa_generator = None
950
-
951
- if args.qa:
952
- try:
953
- # Load LLM
954
- llm_model, llm_tokenizer = load_llama_model(args.llm_name, args.device)
955
- print("LLM loaded successfully for QA")
956
- except Exception as e:
957
- print(f"Error loading LLM: {e}")
958
- print("Falling back to retrieval-only mode")
959
- args.qa = False
960
 
961
  if args.index or args.reindex:
962
  # Index text files
@@ -964,47 +953,43 @@ def main():
964
  embed_model, embed_tokenizer, args.data_dir, args.output_dir, args.device, args.chunk_size
965
  )
966
  print(f"Indexing complete. Vector store saved to {vector_store_path}")
967
-
968
- # Set up QA chain if enabled
969
- if args.qa and llm_model is not None and llm_tokenizer is not None:
970
- qa_generator = setup_qa_system(llm_model, llm_tokenizer, vector_store)
 
 
 
971
 
972
  if args.query:
973
- # Query the corpus
974
- results, vector_store = query_text_corpus(
975
- embed_model, embed_tokenizer, vector_store_path, args.query, args.top_k, args.device
976
- )
977
 
978
- # Print retrieval results
979
- print("\nSearch Results:")
980
- for i, (doc, score) in enumerate(results):
981
- print(f"\nResult {i+1} (Score: {score:.4f}):")
982
- print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
983
 
984
- # Extract and print only relevant sentences
985
- relevant_text = extract_relevant_sentences(doc.page_content, args.query)
986
- print(f"Content: {relevant_text}")
987
-
988
- # If QA is enabled, also answer the question using the LLM
989
- if args.qa and llm_model is not None and llm_tokenizer is not None:
990
- if qa_generator is None:
991
- qa_generator = setup_qa_system(llm_model, llm_tokenizer, vector_store)
 
 
 
 
992
 
993
- # Get answer from QA chain
994
- print("\nGenerating answer using LLM...")
995
- try:
996
- answer = qa_generator(args.query)
997
- print("\nLLM Answer:")
998
- print(answer)
999
- except Exception as e:
1000
- print(f"Error generating answer: {e}")
1001
 
1002
  if args.interactive:
1003
  print("\nInteractive mode. Enter queries (or type 'quit' to exit).")
1004
 
1005
- # For the first query, load vector store
1006
- vector_store = None
1007
-
1008
  while True:
1009
  print("\nEnter query:")
1010
  query = input()
@@ -1015,33 +1000,28 @@ def main():
1015
  if query.lower() == 'quit':
1016
  break
1017
 
1018
- # Query the corpus
1019
- results, vector_store = query_text_corpus(
1020
- embed_model, embed_tokenizer, vector_store_path, query, args.top_k, args.device
1021
- )
1022
-
1023
- # Print retrieval results
1024
- print("\nSearch Results:")
1025
- for i, (doc, score) in enumerate(results):
1026
- print(f"\nResult {i+1} (Score: {score:.4f}):")
1027
- print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
1028
 
1029
- # Extract and print only relevant sentences
1030
- relevant_text = extract_relevant_sentences(doc.page_content, query)
1031
- print(f"Content: {relevant_text}")
 
 
 
 
 
 
1032
 
1033
- # If QA is enabled, also answer the question using the LLM
1034
- if args.qa and llm_model is not None and llm_tokenizer is not None:
1035
- if qa_generator is None:
1036
- qa_generator = setup_qa_system(llm_model, llm_tokenizer, vector_store)
1037
 
1038
- print("\nGenerating answer using LLM...")
1039
- try:
1040
- answer = qa_generator(query)
1041
- print("\nLLM Answer:")
1042
- print(answer)
1043
- except Exception as e:
1044
- print(f"Error generating answer: {e}")
1045
 
1046
  # Clean up GPU memory
1047
  if args.device == "cuda":
 
744
 
745
  return model, tokenizer
746
 
747
+ # NEW FUNCTIONS FOR COMBINED RESULTS APPROACH
748
+
749
+ def combine_top_results(results, query, max_results=4):
750
+ """
751
+ Combine the top search results into a single coherent context
752
+
753
+ Args:
754
+ results: List of (Document, score) tuples from retrieval
755
+ query: Original user query
756
+ max_results: Maximum number of results to combine
757
+
758
+ Returns:
759
+ String containing combined context from top results
760
+ """
761
+ # Sort results by score (highest first) and take top N
762
+ sorted_results = sorted(results, key=lambda x: x[1], reverse=True)[:max_results]
763
+
764
+ combined_texts = []
765
+ seen_content = set() # To avoid duplicates
766
+
767
+ for doc, score in sorted_results:
768
+ # Extract relevant sentences to keep context focused
769
+ relevant_text = extract_relevant_sentences(doc.page_content, query, window_size=3)
770
+
771
+ # Skip if this exact text has been seen before
772
+ if relevant_text in seen_content:
773
+ continue
774
+
775
+ # Add source information to the text
776
+ source_name = os.path.basename(doc.metadata["source"])
777
+ text_with_source = f"{relevant_text} [Source: {source_name}]"
778
+
779
+ combined_texts.append(text_with_source)
780
+ seen_content.add(relevant_text)
781
+
782
+ # Combine all texts with clear separation
783
+ combined_context = "\n\n".join(combined_texts)
784
+
785
+ return combined_context
786
+
787
+ def setup_enhanced_qa_system(model, tokenizer, vector_store):
788
  """
789
+ Set up an enhanced QA system using the model and retriever with result combination
790
  """
791
  # Create retriever
792
  retriever = vector_store.as_retriever(
793
  search_type="similarity",
794
+ search_kwargs={"k": 6} # Get more results than we'll use to filter better
795
  )
796
 
797
+ # Create a function to generate answers with combined context
798
+ def generate_enhanced_answer(query):
799
+ # Get raw documents and scores
800
+ docs = vector_store.similarity_search_with_score(query, k=6)
 
 
 
 
801
 
802
+ # Combine the top results into a single context
803
+ combined_context = combine_top_results(docs, query, max_results=4)
804
 
805
+ # Create prompt with the combined context
806
  prompt = f"""
807
  आपको निम्नलिखित संदर्भ से जानकारी के आधार पर एक प्रश्न का उत्तर देना है।
808
+ यदि आप उत्तर नहीं जानते हैं, तो बस "मुझे नहीं पता" कहें। अपने उत्तर में सभी प्रासंगिक जानकारी का उपयोग करें।
809
 
810
  संदर्भ:
811
+ {combined_context}
812
 
813
  प्रश्न: {query}
814
 
 
833
  do_sample=True
834
  )
835
  except Exception as e:
836
+ return f"Error generating response: {str(e)}", None
837
 
838
  # Decode the generated text
839
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
841
  # Extract just the answer part (after the prompt)
842
  answer = full_response.split("उत्तर:")[-1].strip()
843
 
844
+ return answer, combined_context
845
 
846
+ return generate_enhanced_answer
847
 
848
  # Main RAG functions
849
  def index_text_files(model, tokenizer, data_dir, output_dir, device="cuda", chunk_size=500):
 
894
  # Perform similarity search
895
  results = perform_similarity_search(vector_store, query, k=k)
896
 
897
+ return results, vector_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
 
899
  def main():
900
+ parser = argparse.ArgumentParser(description="Hindi RAG System with Combined Results")
901
  parser.add_argument("--model_dir", type=str, default="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final",
902
  help="Directory containing the model and tokenizer")
903
  parser.add_argument("--tokenizer_dir", type=str, default="/home/ubuntu/hindi_tokenizer",
 
920
  help="Run in interactive mode for querying")
921
  parser.add_argument("--reindex", action="store_true",
922
  help="Force reindexing even if index exists")
 
 
923
  parser.add_argument("--llm_name", type=str, default="unsloth/Llama-3.2-1B-Instruct",
924
  help="HuggingFace model name for the LLM")
925
+ parser.add_argument("--show_context", action="store_true",
926
+ help="Show the combined context sent to the LLM")
927
+ parser.add_argument("--show_raw_results", action="store_true",
928
+ help="Show the raw search results before combination")
929
  args = parser.parse_args()
930
 
931
  # Load embedding model and tokenizer
 
937
  # Create vector store path
938
  vector_store_path = os.path.join(args.output_dir, "faiss_index")
939
 
940
+ # Load LLM
941
+ try:
942
+ # Load LLM
943
+ llm_model, llm_tokenizer = load_llama_model(args.llm_name, args.device)
944
+ print("LLM loaded successfully for QA")
945
+ except Exception as e:
946
+ print(f"Error loading LLM: {e}")
947
+ print("Cannot proceed without LLM for this combined results approach")
948
+ return
 
 
 
 
 
949
 
950
  if args.index or args.reindex:
951
  # Index text files
 
953
  embed_model, embed_tokenizer, args.data_dir, args.output_dir, args.device, args.chunk_size
954
  )
955
  print(f"Indexing complete. Vector store saved to {vector_store_path}")
956
+
957
+ # Load vector store for querying
958
+ embeddings = HindiSentenceEmbeddings(embed_model, embed_tokenizer, device=args.device)
959
+ vector_store = load_vector_store(vector_store_path, embeddings)
960
+
961
+ # Set up enhanced QA system
962
+ qa_generator = setup_enhanced_qa_system(llm_model, llm_tokenizer, vector_store)
963
 
964
  if args.query:
965
+ # Process the query with the enhanced system
966
+ print(f"\nProcessing query: {args.query}")
 
 
967
 
968
+ # Show raw results if requested
969
+ if args.show_raw_results:
970
+ results, _ = query_text_corpus(
971
+ embed_model, embed_tokenizer, vector_store_path, args.query, args.top_k, args.device
972
+ )
973
 
974
+ print("\nRaw Search Results:")
975
+ for i, (doc, score) in enumerate(results):
976
+ print(f"\nResult {i+1} (Score: {score:.4f}):")
977
+ print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
978
+ print(f"Content: {doc.page_content[:200]}...")
979
+
980
+ # Generate enhanced answer
981
+ answer, context = qa_generator(args.query)
982
+
983
+ if args.show_context:
984
+ print("\nCombined Context:")
985
+ print(context)
986
 
987
+ print("\nEnhanced LLM Answer:")
988
+ print(answer)
 
 
 
 
 
 
989
 
990
  if args.interactive:
991
  print("\nInteractive mode. Enter queries (or type 'quit' to exit).")
992
 
 
 
 
993
  while True:
994
  print("\nEnter query:")
995
  query = input()
 
1000
  if query.lower() == 'quit':
1001
  break
1002
 
1003
+ # Show raw results if requested
1004
+ if args.show_raw_results:
1005
+ results, _ = query_text_corpus(
1006
+ embed_model, embed_tokenizer, vector_store_path, query, args.top_k, args.device
1007
+ )
 
 
 
 
 
1008
 
1009
+ print("\nRaw Search Results:")
1010
+ for i, (doc, score) in enumerate(results):
1011
+ print(f"\nResult {i+1} (Score: {score:.4f}):")
1012
+ print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
1013
+ print(f"Content: {doc.page_content[:200]}...")
1014
+
1015
+ # Process the query
1016
+ print(f"\nProcessing query: {query}")
1017
+ answer, context = qa_generator(query)
1018
 
1019
+ if args.show_context:
1020
+ print("\nCombined Context:")
1021
+ print(context)
 
1022
 
1023
+ print("\nEnhanced LLM Answer:")
1024
+ print(answer)
 
 
 
 
 
1025
 
1026
  # Clean up GPU memory
1027
  if args.device == "cuda":
hindi-rag-system.py.amltmp CHANGED
@@ -744,35 +744,71 @@ def load_llama_model(model_name="unsloth/Llama-3.2-1B-Instruct", device="cuda"):
744
 
745
  return model, tokenizer
746
 
747
- def setup_qa_system(model, tokenizer, vector_store):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  """
749
- Set up a direct QA system using the model and retriever
750
  """
751
  # Create retriever
752
  retriever = vector_store.as_retriever(
753
  search_type="similarity",
754
- search_kwargs={"k": 3}
755
  )
756
 
757
- # Create a function to generate answers
758
- def generate_answer(query):
759
- # Retrieve documents
760
- try:
761
- docs = retriever.invoke(query)
762
- except:
763
- # Fallback to older method if invoke isn't available
764
- docs = retriever.get_relevant_documents(query)
765
 
766
- # Extract the content
767
- context = "\n\n".join([doc.page_content for doc in docs])
768
 
769
- # Create prompt
770
  prompt = f"""
771
  आपको निम्नलिखित संदर्भ से जानकारी के आधार पर एक प्रश्न का उत्तर देना है।
772
- यदि आप उत्तर नहीं जानते हैं, तो बस "मुझे नहीं पता" कहें।
773
 
774
  संदर्भ:
775
- {context}
776
 
777
  प्रश्न: {query}
778
 
@@ -797,7 +833,7 @@ def setup_qa_system(model, tokenizer, vector_store):
797
  do_sample=True
798
  )
799
  except Exception as e:
800
- return f"Error generating response: {str(e)}"
801
 
802
  # Decode the generated text
803
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -805,9 +841,9 @@ def setup_qa_system(model, tokenizer, vector_store):
805
  # Extract just the answer part (after the prompt)
806
  answer = full_response.split("उत्तर:")[-1].strip()
807
 
808
- return answer
809
 
810
- return generate_answer
811
 
812
  # Main RAG functions
813
  def index_text_files(model, tokenizer, data_dir, output_dir, device="cuda", chunk_size=500):
@@ -858,54 +894,10 @@ def query_text_corpus(model, tokenizer, vector_store_path, query, k=6, device="c
858
  # Perform similarity search
859
  results = perform_similarity_search(vector_store, query, k=k)
860
 
861
- # Post-process results to combine adjacent chunks if they're from the same source
862
- processed_results = []
863
- seen_chunks = set()
864
-
865
- for doc, score in results:
866
- chunk_id = doc.metadata["chunk_id"]
867
- source = doc.metadata["source"]
868
-
869
- # Skip if we've already included this chunk
870
- if (source, chunk_id) in seen_chunks:
871
- continue
872
-
873
- seen_chunks.add((source, chunk_id))
874
-
875
- # Try to find adjacent chunks and combine them
876
- combined_content = doc.page_content
877
-
878
- # Look for adjacent chunks in results (both previous and next)
879
- for adj_id in [chunk_id-1, chunk_id+1]:
880
- for other_doc, _ in results:
881
- if (other_doc.metadata["source"] == source and
882
- other_doc.metadata["chunk_id"] == adj_id and
883
- (source, adj_id) not in seen_chunks):
884
-
885
- # Add the adjacent chunk content
886
- if adj_id < chunk_id: # Previous chunk
887
- combined_content = other_doc.page_content + " " + combined_content
888
- else: # Next chunk
889
- combined_content = combined_content + " " + other_doc.page_content
890
-
891
- seen_chunks.add((source, adj_id))
892
-
893
- # Create a new document with combined content
894
- combined_doc = Document(
895
- page_content=combined_content,
896
- metadata={
897
- "source": source,
898
- "chunk_id": chunk_id,
899
- "is_combined": True if combined_content != doc.page_content else False
900
- }
901
- )
902
-
903
- processed_results.append((combined_doc, score))
904
-
905
- return processed_results, vector_store
906
 
907
  def main():
908
- parser = argparse.ArgumentParser(description="Hindi RAG System with LangChain and FAISS")
909
  parser.add_argument("--model_dir", type=str, default="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final",
910
  help="Directory containing the model and tokenizer")
911
  parser.add_argument("--tokenizer_dir", type=str, default="/home/ubuntu/hindi_tokenizer",
@@ -928,10 +920,12 @@ def main():
928
  help="Run in interactive mode for querying")
929
  parser.add_argument("--reindex", action="store_true",
930
  help="Force reindexing even if index exists")
931
- parser.add_argument("--qa", action="store_true",
932
- help="Use LLM for question answering instead of just retrieval")
933
  parser.add_argument("--llm_name", type=str, default="unsloth/Llama-3.2-1B-Instruct",
934
  help="HuggingFace model name for the LLM")
 
 
 
 
935
  args = parser.parse_args()
936
 
937
  # Load embedding model and tokenizer
@@ -943,20 +937,15 @@ def main():
943
  # Create vector store path
944
  vector_store_path = os.path.join(args.output_dir, "faiss_index")
945
 
946
- # Load LLM if QA is enabled
947
- llm_model = None
948
- llm_tokenizer = None
949
- qa_generator = None
950
-
951
- if args.qa:
952
- try:
953
- # Load LLM
954
- llm_model, llm_tokenizer = load_llama_model(args.llm_name, args.device)
955
- print("LLM loaded successfully for QA")
956
- except Exception as e:
957
- print(f"Error loading LLM: {e}")
958
- print("Falling back to retrieval-only mode")
959
- args.qa = False
960
 
961
  if args.index or args.reindex:
962
  # Index text files
@@ -964,47 +953,43 @@ def main():
964
  embed_model, embed_tokenizer, args.data_dir, args.output_dir, args.device, args.chunk_size
965
  )
966
  print(f"Indexing complete. Vector store saved to {vector_store_path}")
967
-
968
- # Set up QA chain if enabled
969
- if args.qa and llm_model is not None and llm_tokenizer is not None:
970
- qa_generator = setup_qa_system(llm_model, llm_tokenizer, vector_store)
 
 
 
971
 
972
  if args.query:
973
- # Query the corpus
974
- results, vector_store = query_text_corpus(
975
- embed_model, embed_tokenizer, vector_store_path, args.query, args.top_k, args.device
976
- )
977
 
978
- # Print retrieval results
979
- print("\nSearch Results:")
980
- for i, (doc, score) in enumerate(results):
981
- print(f"\nResult {i+1} (Score: {score:.4f}):")
982
- print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
983
 
984
- # Extract and print only relevant sentences
985
- relevant_text = extract_relevant_sentences(doc.page_content, args.query)
986
- print(f"Content: {relevant_text}")
987
-
988
- # If QA is enabled, also answer the question using the LLM
989
- if args.qa and llm_model is not None and llm_tokenizer is not None:
990
- if qa_generator is None:
991
- qa_generator = setup_qa_system(llm_model, llm_tokenizer, vector_store)
 
 
 
 
992
 
993
- # Get answer from QA chain
994
- print("\nGenerating answer using LLM...")
995
- try:
996
- answer = qa_generator(args.query)
997
- print("\nLLM Answer:")
998
- print(answer)
999
- except Exception as e:
1000
- print(f"Error generating answer: {e}")
1001
 
1002
  if args.interactive:
1003
  print("\nInteractive mode. Enter queries (or type 'quit' to exit).")
1004
 
1005
- # For the first query, load vector store
1006
- vector_store = None
1007
-
1008
  while True:
1009
  print("\nEnter query:")
1010
  query = input()
@@ -1015,33 +1000,28 @@ def main():
1015
  if query.lower() == 'quit':
1016
  break
1017
 
1018
- # Query the corpus
1019
- results, vector_store = query_text_corpus(
1020
- embed_model, embed_tokenizer, vector_store_path, query, args.top_k, args.device
1021
- )
1022
-
1023
- # Print retrieval results
1024
- print("\nSearch Results:")
1025
- for i, (doc, score) in enumerate(results):
1026
- print(f"\nResult {i+1} (Score: {score:.4f}):")
1027
- print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
1028
 
1029
- # Extract and print only relevant sentences
1030
- relevant_text = extract_relevant_sentences(doc.page_content, query)
1031
- print(f"Content: {relevant_text}")
 
 
 
 
 
 
1032
 
1033
- # If QA is enabled, also answer the question using the LLM
1034
- if args.qa and llm_model is not None and llm_tokenizer is not None:
1035
- if qa_generator is None:
1036
- qa_generator = setup_qa_system(llm_model, llm_tokenizer, vector_store)
1037
 
1038
- print("\nGenerating answer using LLM...")
1039
- try:
1040
- answer = qa_generator(query)
1041
- print("\nLLM Answer:")
1042
- print(answer)
1043
- except Exception as e:
1044
- print(f"Error generating answer: {e}")
1045
 
1046
  # Clean up GPU memory
1047
  if args.device == "cuda":
 
744
 
745
  return model, tokenizer
746
 
747
+ # NEW FUNCTIONS FOR COMBINED RESULTS APPROACH
748
+
749
+ def combine_top_results(results, query, max_results=4):
750
+ """
751
+ Combine the top search results into a single coherent context
752
+
753
+ Args:
754
+ results: List of (Document, score) tuples from retrieval
755
+ query: Original user query
756
+ max_results: Maximum number of results to combine
757
+
758
+ Returns:
759
+ String containing combined context from top results
760
+ """
761
+ # Sort results by score (highest first) and take top N
762
+ sorted_results = sorted(results, key=lambda x: x[1], reverse=True)[:max_results]
763
+
764
+ combined_texts = []
765
+ seen_content = set() # To avoid duplicates
766
+
767
+ for doc, score in sorted_results:
768
+ # Extract relevant sentences to keep context focused
769
+ relevant_text = extract_relevant_sentences(doc.page_content, query, window_size=3)
770
+
771
+ # Skip if this exact text has been seen before
772
+ if relevant_text in seen_content:
773
+ continue
774
+
775
+ # Add source information to the text
776
+ source_name = os.path.basename(doc.metadata["source"])
777
+ text_with_source = f"{relevant_text} [Source: {source_name}]"
778
+
779
+ combined_texts.append(text_with_source)
780
+ seen_content.add(relevant_text)
781
+
782
+ # Combine all texts with clear separation
783
+ combined_context = "\n\n".join(combined_texts)
784
+
785
+ return combined_context
786
+
787
+ def setup_enhanced_qa_system(model, tokenizer, vector_store):
788
  """
789
+ Set up an enhanced QA system using the model and retriever with result combination
790
  """
791
  # Create retriever
792
  retriever = vector_store.as_retriever(
793
  search_type="similarity",
794
+ search_kwargs={"k": 6} # Get more results than we'll use to filter better
795
  )
796
 
797
+ # Create a function to generate answers with combined context
798
+ def generate_enhanced_answer(query):
799
+ # Get raw documents and scores
800
+ docs = vector_store.similarity_search_with_score(query, k=6)
 
 
 
 
801
 
802
+ # Combine the top results into a single context
803
+ combined_context = combine_top_results(docs, query, max_results=4)
804
 
805
+ # Create prompt with the combined context
806
  prompt = f"""
807
  आपको निम्नलिखित संदर्भ से जानकारी के आधार पर एक प्रश्न का उत्तर देना है।
808
+ यदि आप उत्तर नहीं जानते हैं, तो बस "मुझे नहीं पता" कहें। अपने उत्तर में सभी प्रासंगिक जानकारी का उपयोग करें।
809
 
810
  संदर्भ:
811
+ {combined_context}
812
 
813
  प्रश्न: {query}
814
 
 
833
  do_sample=True
834
  )
835
  except Exception as e:
836
+ return f"Error generating response: {str(e)}", None
837
 
838
  # Decode the generated text
839
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
841
  # Extract just the answer part (after the prompt)
842
  answer = full_response.split("उत्तर:")[-1].strip()
843
 
844
+ return answer, combined_context
845
 
846
+ return generate_enhanced_answer
847
 
848
  # Main RAG functions
849
  def index_text_files(model, tokenizer, data_dir, output_dir, device="cuda", chunk_size=500):
 
894
  # Perform similarity search
895
  results = perform_similarity_search(vector_store, query, k=k)
896
 
897
+ return results, vector_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898
 
899
  def main():
900
+ parser = argparse.ArgumentParser(description="Hindi RAG System with Combined Results")
901
  parser.add_argument("--model_dir", type=str, default="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final",
902
  help="Directory containing the model and tokenizer")
903
  parser.add_argument("--tokenizer_dir", type=str, default="/home/ubuntu/hindi_tokenizer",
 
920
  help="Run in interactive mode for querying")
921
  parser.add_argument("--reindex", action="store_true",
922
  help="Force reindexing even if index exists")
 
 
923
  parser.add_argument("--llm_name", type=str, default="unsloth/Llama-3.2-1B-Instruct",
924
  help="HuggingFace model name for the LLM")
925
+ parser.add_argument("--show_context", action="store_true",
926
+ help="Show the combined context sent to the LLM")
927
+ parser.add_argument("--show_raw_results", action="store_true",
928
+ help="Show the raw search results before combination")
929
  args = parser.parse_args()
930
 
931
  # Load embedding model and tokenizer
 
937
  # Create vector store path
938
  vector_store_path = os.path.join(args.output_dir, "faiss_index")
939
 
940
+ # Load LLM
941
+ try:
942
+ # Load LLM
943
+ llm_model, llm_tokenizer = load_llama_model(args.llm_name, args.device)
944
+ print("LLM loaded successfully for QA")
945
+ except Exception as e:
946
+ print(f"Error loading LLM: {e}")
947
+ print("Cannot proceed without LLM for this combined results approach")
948
+ return
 
 
 
 
 
949
 
950
  if args.index or args.reindex:
951
  # Index text files
 
953
  embed_model, embed_tokenizer, args.data_dir, args.output_dir, args.device, args.chunk_size
954
  )
955
  print(f"Indexing complete. Vector store saved to {vector_store_path}")
956
+
957
+ # Load vector store for querying
958
+ embeddings = HindiSentenceEmbeddings(embed_model, embed_tokenizer, device=args.device)
959
+ vector_store = load_vector_store(vector_store_path, embeddings)
960
+
961
+ # Set up enhanced QA system
962
+ qa_generator = setup_enhanced_qa_system(llm_model, llm_tokenizer, vector_store)
963
 
964
  if args.query:
965
+ # Process the query with the enhanced system
966
+ print(f"\nProcessing query: {args.query}")
 
 
967
 
968
+ # Show raw results if requested
969
+ if args.show_raw_results:
970
+ results, _ = query_text_corpus(
971
+ embed_model, embed_tokenizer, vector_store_path, args.query, args.top_k, args.device
972
+ )
973
 
974
+ print("\nRaw Search Results:")
975
+ for i, (doc, score) in enumerate(results):
976
+ print(f"\nResult {i+1} (Score: {score:.4f}):")
977
+ print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
978
+ print(f"Content: {doc.page_content[:200]}...")
979
+
980
+ # Generate enhanced answer
981
+ answer, context = qa_generator(args.query)
982
+
983
+ if args.show_context:
984
+ print("\nCombined Context:")
985
+ print(context)
986
 
987
+ print("\nEnhanced LLM Answer:")
988
+ print(answer)
 
 
 
 
 
 
989
 
990
  if args.interactive:
991
  print("\nInteractive mode. Enter queries (or type 'quit' to exit).")
992
 
 
 
 
993
  while True:
994
  print("\nEnter query:")
995
  query = input()
 
1000
  if query.lower() == 'quit':
1001
  break
1002
 
1003
+ # Show raw results if requested
1004
+ if args.show_raw_results:
1005
+ results, _ = query_text_corpus(
1006
+ embed_model, embed_tokenizer, vector_store_path, query, args.top_k, args.device
1007
+ )
 
 
 
 
 
1008
 
1009
+ print("\nRaw Search Results:")
1010
+ for i, (doc, score) in enumerate(results):
1011
+ print(f"\nResult {i+1} (Score: {score:.4f}):")
1012
+ print(f"Source: {doc.metadata['source']}, Chunk: {doc.metadata['chunk_id']}")
1013
+ print(f"Content: {doc.page_content[:200]}...")
1014
+
1015
+ # Process the query
1016
+ print(f"\nProcessing query: {query}")
1017
+ answer, context = qa_generator(query)
1018
 
1019
+ if args.show_context:
1020
+ print("\nCombined Context:")
1021
+ print(context)
 
1022
 
1023
+ print("\nEnhanced LLM Answer:")
1024
+ print(answer)
 
 
 
 
 
1025
 
1026
  # Clean up GPU memory
1027
  if args.device == "cuda":