import os from typing import List, Optional import gradio as gr from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import TextLoader from langchain.docstore.document import Document from langchain.chains import RetrievalQA from langchain.llms.base import LLM from groq import Groq import pypdf # PyMuPDF # --- Custom LLM class using Groq --- class GroqLLM(LLM): model: str = "llama3-8b-8192" api_key: str = "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" # <-- PUT YOUR GROQ API KEY HERE temperature: float = 0.0 def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: client = Groq(api_key=self.api_key) messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] response = client.chat.completions.create( model=self.model, messages=messages, temperature=self.temperature, ) return response.choices[0].message.content @property def _llm_type(self) -> str: return "groq-llm" # --- RAG Setup --- retriever = None qa_chain = None def extract_text_from_pdf(file_path: str) -> str: doc = fitz.open(file_path) text = "" for page in doc: text += page.get_text() doc.close() return text def process_file(file_obj): global retriever, qa_chain ext = os.path.splitext(file_obj.name)[1].lower() try: # Load content if ext == ".pdf": text = extract_text_from_pdf(file_obj.name) elif ext == ".txt": with open(file_obj.name, "r", encoding="utf-8") as f: text = f.read() else: return "❌ Unsupported file format. Please upload a .txt or .pdf file." # Create document chunks document = Document(page_content=text) splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) docs = splitter.split_documents([document]) # Vectorstore with HuggingFace embeddings embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectorstore = Chroma.from_documents(docs, embedding=embeddings, persist_directory="rag_store") retriever = vectorstore.as_retriever() qa_chain = RetrievalQA.from_chain_type( llm=GroqLLM(), retriever=retriever, return_source_documents=True ) return "✅ File processed successfully. You can now ask questions." except Exception as e: return f"❌ Error processing file: {e}" def ask_question(query): if qa_chain is None: return "⚠ Please upload a file first." result = qa_chain({"query": query}) return result["result"] # --- Gradio UI --- with gr.Blocks(title="RAG PDF & Text Chatbot") as demo: gr.Markdown("## 🧠 RAG-powered Q&A Chatbot (Groq + LangChain)") gr.Markdown("Upload a .pdf or .txt file and ask questions based on its content.") file_input = gr.File(label="Upload PDF or Text File", file_types=[".pdf", ".txt"]) upload_status = gr.Textbox(label="Status", interactive=False) file_input.change(fn=process_file, inputs=file_input, outputs=upload_status) question_box = gr.Textbox(label="Ask your question") answer_box = gr.Textbox(label="Answer", interactive=False) submit_btn = gr.Button("Get Answer") submit_btn.click(fn=ask_question, inputs=question_box, outputs=answer_box) demo.launch()