Spaces:

l-tran
/

demo_app

Sleeping

File size: 5,525 Bytes

import os
import streamlit as st
from sentence_transformers import SentenceTransformer, util
from groq import Groq
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation

# CSS styling for a professional look with black background
st.markdown("""
    <style>
        body {
            background-color: #121212;
            color: #ffffff;
            font-family: Arial, sans-serif;
        }
        .title {
            font-size: 36px;
            font-weight: bold;
            color: #e67e22;
            text-align: center;
            margin-bottom: 20px;
        }
        .subheader {
            font-size: 24px;
            color: #f39c12;
            margin-top: 10px;
            text-align: center;
        }
        .input-area {
            color: #ecf0f1;
            font-size: 16px;
        }
        .about-app {
            margin-top: 20px;
            padding: 15px;
            background-color: #1e1e1e;
            border-radius: 8px;
            color: #bdc3c7;
        }
        .footer {
            background-color: #1c1c1c;
            color: #bdc3c7;
            font-size: 14px;
            text-align: center;
            padding: 10px;
            position: fixed;
            bottom: 0;
            left: 0;
            width: 100%;
            z-index: 1000;
        }
        .stTextInput > div > div > input {
            background-color: #2c3e50;
            color: #ecf0f1;
            font-size: 16px;
            border-radius: 5px;
            padding: 10px;
        }
    </style>
""", unsafe_allow_html=True)

# Initialize retriever and Groq client
retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#api_key = os.getenv("GROQ_API_KEY")

#client = Groq(api_key=api_key)

# Knowledge base (documents) and embeddings
documents = [
    "Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
    "The main components of a RAG system are the retriever and the generator.",
    "A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
    "The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
    "Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
]
document_embeddings = retriever.encode(documents, convert_to_tensor=True)

def retrieve(query, top_k=1):
    query_embedding = retriever.encode(query, convert_to_tensor=True)
    hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
    top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
    return top_docs[0] if hits[0] else None

def generate_response(query, context):
    response = client.chat.completions.create(
        messages=[{
            "role": "user",
            "content": f"Context: {context} Question: {query} Answer:"
        }],
        model="gemma2-9b-it"
    )
    return response.choices[0].message.content

# Streamlit app layout
st.markdown('<div class="title">DocumentsReader</div>', unsafe_allow_html=True)
# About the App section
with st.expander("About App"):
    st.write("""
    ### About the App: Document-Based RAG Question Answering
    This application, developed by **Hamaad Ayub Khan**, combines state-of-the-art **Retrieval-Augmented Generation (RAG)** technology with powerful AI models to answer questions based on the content of uploaded documents.
    **Key Features:**
    - Advanced Retrieval System
    - Generative Answering Capability
    - Multi-format Document Support
    - Seamless Knowledge Base Update
    - Contextually Rich Answers
    **Developer Information:** Hamaad Ayub Khan created this application with a commitment to making information retrieval simple, accurate, and accessible.
    **Social Links:**  
    - [GitHub](https://github.com/hakgs1234)  
    - [LinkedIn](https://linkedin.com/in/hamaadayubkhan)  
    """)

# Document upload and knowledge base update
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "pptx", "txt"])
if uploaded_file:
    if uploaded_file.type == "application/pdf":
        file_text = PdfReader(uploaded_file).extract_text()
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        file_text = "\n".join([para.text for para in Document(uploaded_file).paragraphs])
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
        file_text = "\n".join([shape.text for slide in Presentation(uploaded_file).slides for shape in slide.shapes if hasattr(shape, "text")])
    elif uploaded_file.type == "text/plain":
        file_text = uploaded_file.read().decode("utf-8")
    
    documents.append(file_text)
    document_embeddings = retriever.encode(documents, convert_to_tensor=True)
    st.success("Document content successfully added to the knowledge base.")

# Question input and output handling
question = st.text_input("Enter your question:")

# Check if there is a question and display the answer above the input field
if question:
    retrieved_context = retrieve(question)
    answer = generate_response(question, retrieved_context) if retrieved_context else "I'm unable to find relevant information in the knowledge base."
    
    # Display the answer above the input field
    st.markdown("### Answer:")
    st.write(answer)