Spaces:
Sleeping
Sleeping
import os | |
import PyPDF2 | |
import faiss | |
import streamlit as st | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from groq import Groq | |
# Initialize Groq API | |
GROQ_API_KEY = "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" | |
client = Groq(api_key=GROQ_API_KEY) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_path): | |
text = "" | |
with open(pdf_path, "rb") as file: | |
reader = PyPDF2.PdfReader(file) | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to create chunks and embeddings using LangChain | |
def process_text_with_langchain(text): | |
# Split text into chunks | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, chunk_overlap=50 | |
) | |
chunks = text_splitter.split_text(text) | |
# Create embeddings and FAISS index | |
embeddings = HuggingFaceEmbeddings() | |
vectorstore = FAISS.from_texts(chunks, embeddings) | |
return vectorstore, chunks | |
# Function to query FAISS index | |
def query_faiss_index(query, vectorstore): | |
docs = vectorstore.similarity_search(query, k=3) | |
results = [doc.page_content for doc in docs] | |
return results | |
# Function to interact with Groq LLM | |
def ask_groq(query): | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": query, | |
} | |
], | |
model="llama3-8b-8192", | |
stream=False, | |
) | |
return chat_completion.choices[0].message.content | |
# Streamlit app | |
st.title("RAG-Based Chatbot") | |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
if uploaded_file is not None: | |
with open("uploaded_file.pdf", "wb") as f: | |
f.write(uploaded_file.read()) | |
st.info("Processing the PDF...") | |
text = extract_text_from_pdf("uploaded_file.pdf") | |
vectorstore, chunks = process_text_with_langchain(text) | |
st.success("PDF processed and indexed successfully!") | |
query = st.text_input("Ask a question about the document") | |
if query: | |
st.info("Searching relevant chunks...") | |
relevant_chunks = query_faiss_index(query, vectorstore) | |
context = "\n".join(relevant_chunks) | |
st.info("Getting response from the language model...") | |
response = ask_groq(f"Context: {context}\n\nQuestion: {query}") | |
st.success(response) | |