Spaces:
Sleeping
Sleeping
import os | |
from typing import List, Optional | |
import gradio as gr | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import Chroma | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import TextLoader | |
from langchain.docstore.document import Document | |
from langchain.chains import RetrievalQA | |
from langchain.llms.base import LLM | |
from groq import Groq | |
import pypdf # PyMuPDF | |
# --- Custom LLM class using Groq --- | |
class GroqLLM(LLM): | |
model: str = "llama3-8b-8192" | |
api_key: str = "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" # <-- PUT YOUR GROQ API KEY HERE | |
temperature: float = 0.0 | |
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: | |
client = Groq(api_key=self.api_key) | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
] | |
response = client.chat.completions.create( | |
model=self.model, | |
messages=messages, | |
temperature=self.temperature, | |
) | |
return response.choices[0].message.content | |
def _llm_type(self) -> str: | |
return "groq-llm" | |
# --- RAG Setup --- | |
retriever = None | |
qa_chain = None | |
def extract_text_from_pdf(file_path: str) -> str: | |
doc = fitz.open(file_path) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
doc.close() | |
return text | |
def process_file(file_obj): | |
global retriever, qa_chain | |
ext = os.path.splitext(file_obj.name)[1].lower() | |
try: | |
# Load content | |
if ext == ".pdf": | |
text = extract_text_from_pdf(file_obj.name) | |
elif ext == ".txt": | |
with open(file_obj.name, "r", encoding="utf-8") as f: | |
text = f.read() | |
else: | |
return "β Unsupported file format. Please upload a .txt or .pdf file." | |
# Create document chunks | |
document = Document(page_content=text) | |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
docs = splitter.split_documents([document]) | |
# Vectorstore with HuggingFace embeddings | |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
vectorstore = Chroma.from_documents(docs, embedding=embeddings, persist_directory="rag_store") | |
retriever = vectorstore.as_retriever() | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=GroqLLM(), | |
retriever=retriever, | |
return_source_documents=True | |
) | |
return "β File processed successfully. You can now ask questions." | |
except Exception as e: | |
return f"β Error processing file: {e}" | |
def ask_question(query): | |
if qa_chain is None: | |
return "β Please upload a file first." | |
result = qa_chain({"query": query}) | |
return result["result"] | |
# --- Gradio UI --- | |
with gr.Blocks(title="RAG PDF & Text Chatbot") as demo: | |
gr.Markdown("## π§ RAG-powered Q&A Chatbot (Groq + LangChain)") | |
gr.Markdown("Upload a .pdf or .txt file and ask questions based on its content.") | |
file_input = gr.File(label="Upload PDF or Text File", file_types=[".pdf", ".txt"]) | |
upload_status = gr.Textbox(label="Status", interactive=False) | |
file_input.change(fn=process_file, inputs=file_input, outputs=upload_status) | |
question_box = gr.Textbox(label="Ask your question") | |
answer_box = gr.Textbox(label="Answer", interactive=False) | |
submit_btn = gr.Button("Get Answer") | |
submit_btn.click(fn=ask_question, inputs=question_box, outputs=answer_box) | |
demo.launch() | |