File size: 5,525 Bytes
560a395 96bca4f 560a395 96bca4f 560a395 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import os
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from groq import Groq
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation
# CSS styling for a professional look with black background
st.markdown("""
<style>
body {
background-color: #121212;
color: #ffffff;
font-family: Arial, sans-serif;
}
.title {
font-size: 36px;
font-weight: bold;
color: #e67e22;
text-align: center;
margin-bottom: 20px;
}
.subheader {
font-size: 24px;
color: #f39c12;
margin-top: 10px;
text-align: center;
}
.input-area {
color: #ecf0f1;
font-size: 16px;
}
.about-app {
margin-top: 20px;
padding: 15px;
background-color: #1e1e1e;
border-radius: 8px;
color: #bdc3c7;
}
.footer {
background-color: #1c1c1c;
color: #bdc3c7;
font-size: 14px;
text-align: center;
padding: 10px;
position: fixed;
bottom: 0;
left: 0;
width: 100%;
z-index: 1000;
}
.stTextInput > div > div > input {
background-color: #2c3e50;
color: #ecf0f1;
font-size: 16px;
border-radius: 5px;
padding: 10px;
}
</style>
""", unsafe_allow_html=True)
# Initialize retriever and Groq client
retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#api_key = os.getenv("GROQ_API_KEY")
#client = Groq(api_key=api_key)
# Knowledge base (documents) and embeddings
documents = [
"Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
"The main components of a RAG system are the retriever and the generator.",
"A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
"The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
"Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
]
document_embeddings = retriever.encode(documents, convert_to_tensor=True)
def retrieve(query, top_k=1):
query_embedding = retriever.encode(query, convert_to_tensor=True)
hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
return top_docs[0] if hits[0] else None
def generate_response(query, context):
response = client.chat.completions.create(
messages=[{
"role": "user",
"content": f"Context: {context} Question: {query} Answer:"
}],
model="gemma2-9b-it"
)
return response.choices[0].message.content
# Streamlit app layout
st.markdown('<div class="title">DocumentsReader</div>', unsafe_allow_html=True)
# About the App section
with st.expander("About App"):
st.write("""
### About the App: Document-Based RAG Question Answering
This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents.
**Key Features:**
- Advanced Retrieval System
- Generative Answering Capability
- Multi-format Document Support
- Seamless Knowledge Base Update
- Contextually Rich Answers
**Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible.
**Social Links:**
- [GitHub](https://github.com/hakgs1234)
- [LinkedIn](https://linkedin.com/in/hamaadayubkhan)
""")
# Document upload and knowledge base update
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"])
if uploaded_file:
if uploaded_file.type == "application/pdf":
file_text = PdfReader(uploaded_file).extract_text()
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs])
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")])
elif uploaded_file.type == "text/plain":
file_text = uploaded_file.read().decode("utf-8")
documents.append(file_text)
document_embeddings = retriever.encode(documents, convert_to_tensor=True)
st.success("Document content successfully added to the knowledge base.")
# Question input and output handling
question = st.text_input("Enter your question:")
# Check if there is a question and display the answer above the input field
if question:
retrieved_context = retrieve(question)
answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base."
# Display the answer above the input field
st.markdown("### Answer:")
st.write(answer) |