File size: 5,525 Bytes
560a395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96bca4f
560a395
96bca4f
560a395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from groq import Groq
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation

# CSS styling for a professional look with black background
st.markdown("""
    <style>
        body {
            background-color: #121212;
            color: #ffffff;
            font-family: Arial, sans-serif;
        }
        .title {
            font-size: 36px;
            font-weight: bold;
            color: #e67e22;
            text-align: center;
            margin-bottom: 20px;
        }
        .subheader {
            font-size: 24px;
            color: #f39c12;
            margin-top: 10px;
            text-align: center;
        }
        .input-area {
            color: #ecf0f1;
            font-size: 16px;
        }
        .about-app {
            margin-top: 20px;
            padding: 15px;
            background-color: #1e1e1e;
            border-radius: 8px;
            color: #bdc3c7;
        }
        .footer {
            background-color: #1c1c1c;
            color: #bdc3c7;
            font-size: 14px;
            text-align: center;
            padding: 10px;
            position: fixed;
            bottom: 0;
            left: 0;
            width: 100%;
            z-index: 1000;
        }
        .stTextInput > div > div > input {
            background-color: #2c3e50;
            color: #ecf0f1;
            font-size: 16px;
            border-radius: 5px;
            padding: 10px;
        }
    </style>
""", unsafe_allow_html=True)

# Initialize retriever and Groq client
retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#api_key = os.getenv("GROQ_API_KEY")

#client = Groq(api_key=api_key)

# Knowledge base (documents) and embeddings
documents = [
    "Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
    "The main components of a RAG system are the retriever and the generator.",
    "A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
    "The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
    "Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
]
document_embeddings = retriever.encode(documents, convert_to_tensor=True)

def retrieve(query, top_k=1):
    query_embedding = retriever.encode(query, convert_to_tensor=True)
    hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
    top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
    return top_docs[0] if hits[0] else None

def generate_response(query, context):
    response = client.chat.completions.create(
        messages=[{
            "role": "user",
            "content": f"Context: {context} Question: {query} Answer:"
        }],
        model="gemma2-9b-it"
    )
    return response.choices[0].message.content

# Streamlit app layout
st.markdown('<div class="title">DocumentsReader</div>', unsafe_allow_html=True)
# About the App section
with st.expander("About App"):
    st.write("""
    ### About the App: Document-Based RAG Question Answering
    This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents.
    **Key Features:**
    - Advanced Retrieval System
    - Generative Answering Capability
    - Multi-format Document Support
    - Seamless Knowledge Base Update
    - Contextually Rich Answers
    **Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible.
    **Social Links:**  
    - [GitHub](https://github.com/hakgs1234)  
    - [LinkedIn](https://linkedin.com/in/hamaadayubkhan)  
    """)

# Document upload and knowledge base update
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"])
if uploaded_file:
    if uploaded_file.type == "application/pdf":
        file_text = PdfReader(uploaded_file).extract_text()
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs])
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
        file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")])
    elif uploaded_file.type == "text/plain":
        file_text = uploaded_file.read().decode("utf-8")
    
    documents.append(file_text)
    document_embeddings = retriever.encode(documents, convert_to_tensor=True)
    st.success("Document content successfully added to the knowledge base.")

# Question input and output handling
question = st.text_input("Enter your question:")

# Check if there is a question and display the answer above the input field
if question:
    retrieved_context = retrieve(question)
    answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base."
    
    # Display the answer above the input field
    st.markdown("### Answer:")
    st.write(answer)