Shahabmoin's picture
Update app.py
36c1c6e verified
import os
import PyPDF2
import faiss
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from groq import Groq
# Initialize Groq API
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as file:
reader = PyPDF2.PdfReader(file)
for page in reader.pages:
text += page.extract_text()
return text
# Function to create chunks and embeddings using LangChain
def process_text_with_langchain(text):
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500, chunk_overlap=50
)
chunks = text_splitter.split_text(text)
# Create embeddings and FAISS index
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_texts(chunks, embeddings)
return vectorstore, chunks
# Function to query FAISS index
def query_faiss_index(query, vectorstore):
docs = vectorstore.similarity_search(query, k=3)
results = [doc.page_content for doc in docs]
return results
# Function to interact with Groq LLM
def ask_groq(query):
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": query,
}
],
model="llama3-8b-8192",
stream=False,
)
return chat_completion.choices[0].message.content
# Streamlit app
st.title("RAG-Based Chatbot")
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file is not None:
with open("uploaded_file.pdf", "wb") as f:
f.write(uploaded_file.read())
st.info("Processing the PDF...")
text = extract_text_from_pdf("uploaded_file.pdf")
vectorstore, chunks = process_text_with_langchain(text)
st.success("PDF processed and indexed successfully!")
query = st.text_input("Ask a question about the document")
if query:
st.info("Searching relevant chunks...")
relevant_chunks = query_faiss_index(query, vectorstore)
context = "\n".join(relevant_chunks)
st.info("Getting response from the language model...")
response = ask_groq(f"Context: {context}\n\nQuestion: {query}")
st.success(response)