Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from langchain_community.document_loaders import UnstructuredPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain_groq import ChatGroq | |
| from langchain.chains import RetrievalQA | |
| # Load environment variables | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| os.environ["GROQ_API_KEY"] = GROQ_API_KEY | |
| working_dir = os.path.dirname(os.path.abspath(__file__)) | |
| # Initialize the embedding model | |
| embedding = HuggingFaceEmbeddings() | |
| # Initialize the DeepSeek-R1 70B model | |
| deepseek_llm = ChatGroq( | |
| model="deepseek-r1-distill-llama-70b", | |
| temperature=0 | |
| ) | |
| # Initialize the Llama-3 70B model | |
| llama3_llm = ChatGroq( | |
| model="llama-3.3-70b-versatile", | |
| temperature=0 | |
| ) | |
| def process_document_to_chromadb(file_name): | |
| """Processes a PDF document and stores embeddings in ChromaDB.""" | |
| loader = UnstructuredPDFLoader(os.path.join(working_dir, file_name)) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) | |
| texts = text_splitter.split_documents(documents) | |
| vectordb = Chroma.from_documents( | |
| documents=texts, | |
| embedding=embedding, | |
| persist_directory=os.path.join(working_dir, "doc_vectorstore") | |
| ) | |
| return "Document successfully processed and stored." | |
| def answer_question(user_question): | |
| """Retrieves answers from stored documents using DeepSeek-R1 and Llama-3.""" | |
| vectordb = Chroma( | |
| persist_directory=os.path.join(working_dir, "doc_vectorstore"), | |
| embedding_function=embedding | |
| ) | |
| retriever = vectordb.as_retriever() | |
| # DeepSeek-R1 response | |
| qa_chain_deepseek = RetrievalQA.from_chain_type( | |
| llm=deepseek_llm, | |
| chain_type="stuff", | |
| retriever=retriever, | |
| return_source_documents=True | |
| ) | |
| response_deepseek = qa_chain_deepseek.invoke({"query": user_question}) | |
| answer_deepseek = response_deepseek["result"] | |
| # Llama-3 response | |
| qa_chain_llama3 = RetrievalQA.from_chain_type( | |
| llm=llama3_llm, | |
| chain_type="stuff", | |
| retriever=retriever, | |
| return_source_documents=True | |
| ) | |
| response_llama3 = qa_chain_llama3.invoke({"query": user_question}) | |
| answer_llama3 = response_llama3["result"] | |
| return {"answer_deepseek": answer_deepseek, "answer_llama3": answer_llama3} | |