RAG1 / app.py
sreelakshmimukkizhi's picture
Update app.py
c4b7a63 verified
import os
from typing import List, Optional
import gradio as gr
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.llms.base import LLM
from groq import Groq
import pypdf # PyMuPDF
# --- Custom LLM class using Groq ---
class GroqLLM(LLM):
model: str = "llama3-8b-8192"
api_key: str = "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" # <-- PUT YOUR GROQ API KEY HERE
temperature: float = 0.0
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
client = Groq(api_key=self.api_key)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
response = client.chat.completions.create(
model=self.model,
messages=messages,
temperature=self.temperature,
)
return response.choices[0].message.content
@property
def _llm_type(self) -> str:
return "groq-llm"
# --- RAG Setup ---
retriever = None
qa_chain = None
def extract_text_from_pdf(file_path: str) -> str:
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
doc.close()
return text
def process_file(file_obj):
global retriever, qa_chain
ext = os.path.splitext(file_obj.name)[1].lower()
try:
# Load content
if ext == ".pdf":
text = extract_text_from_pdf(file_obj.name)
elif ext == ".txt":
with open(file_obj.name, "r", encoding="utf-8") as f:
text = f.read()
else:
return "❌ Unsupported file format. Please upload a .txt or .pdf file."
# Create document chunks
document = Document(page_content=text)
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents([document])
# Vectorstore with HuggingFace embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(docs, embedding=embeddings, persist_directory="rag_store")
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(
llm=GroqLLM(),
retriever=retriever,
return_source_documents=True
)
return "βœ… File processed successfully. You can now ask questions."
except Exception as e:
return f"❌ Error processing file: {e}"
def ask_question(query):
if qa_chain is None:
return "⚠ Please upload a file first."
result = qa_chain({"query": query})
return result["result"]
# --- Gradio UI ---
with gr.Blocks(title="RAG PDF & Text Chatbot") as demo:
gr.Markdown("## 🧠 RAG-powered Q&A Chatbot (Groq + LangChain)")
gr.Markdown("Upload a .pdf or .txt file and ask questions based on its content.")
file_input = gr.File(label="Upload PDF or Text File", file_types=[".pdf", ".txt"])
upload_status = gr.Textbox(label="Status", interactive=False)
file_input.change(fn=process_file, inputs=file_input, outputs=upload_status)
question_box = gr.Textbox(label="Ask your question")
answer_box = gr.Textbox(label="Answer", interactive=False)
submit_btn = gr.Button("Get Answer")
submit_btn.click(fn=ask_question, inputs=question_box, outputs=answer_box)
demo.launch()