Spaces:
Sleeping
Sleeping
File size: 6,323 Bytes
560fd9c ef6aa40 560fd9c ef6aa40 560fd9c 1500788 560fd9c c831fa2 94267e6 560fd9c 7c5fb2b 560fd9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
from langchain_community.llms import HuggingFaceHub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
import gradio as gr
import os
#ignoring Unnecessary warnings
def warn(*arg, **kwargs):
pass
import warnings
warnings.warn = warn
warnings.filterwarnings("ignore")
#Initialize LLM
def get_llm(model_id, temperature, max_new_tokens):
huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
#initialize the pipeline
Qbot_llm = HuggingFaceHub(
repo_id=model_id,
model_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
huggingfacehub_api_token=huggingface_api_key
)
return Qbot_llm
#Document Loader
def document_loader(file):
loader = PyPDFLoader(file.name)
loaded_document = loader.load()
return loaded_document
#Define Text Splitter
def text_splitter(data):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 50,
length_function = len,
)
chunks = text_splitter.split_documents(data)
return chunks
#Define Vector store
def vector_database(chunks, embedding_model_name):
embedding_model = HuggingFaceEmbeddings(model_name = embedding_model_name)
vectordb = Chroma.from_documents(chunks, embedding_model)
return vectordb
#Define Embedding Model
def huggingface_embeddings(model_name):
huggingface_embedding = HuggingFaceEmbeddings(model_name=model_name)
return huggingface_embedding
#Define Retriever
def retriever(file, embedding_model_name):
splits = document_loader(file)
chunks = text_splitter(splits)
vectordb = vector_database(chunks, embedding_model_name)
retriever = vectordb.as_retriever()
return retriever
#Define a Question Answering Chain
#QA chain
def retriever_qa(file, query, llm_model, temperature, max_new_tokens, embedding_model):
llm = get_llm(llm_model, temperature, max_new_tokens)
retriever_obj = retriever(file, embedding_model)
qa = RetrievalQA.from_chain_type(llm = llm,
chain_type = "stuff",
retriever = retriever_obj,
return_source_documents = False
)
response = qa.invoke(query)
return response["result"]
llm_models = [
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"EleutherAI/gpt-neo-2.7B",
"google/flan-t5-large",
"HuggingFaceH4/zephyr-7b-beta",
"deepseek-ai/DeepSeek-V3",
]
embedding_models = [
"sentence-transformers/all-distilroberta-v1",
"sentence-transformers/all-mpnet-base-v2",
"sentence-transformers/all-MiniLM-L6-v2",
]
# CSS for custom styling
custom_css = """
#component-0 {
max-width: 800px;
margin: auto;
padding: 20px;
}
.gradio-container {
font-family: 'Arial', sans-serif;
}
.gr-button {
background: linear-gradient(90deg, #4CAF50 0%, #45a049 100%);
border: none;
color: white;
}
.gr-button:hover {
background: linear-gradient(90deg, #45a049 0%, #4CAF50 100%);
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(0,0,0,0.1);
}
.gr-form {
background-color: #ffffff;
border-radius: 12px;
padding: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.gr-box {
border-radius: 8px;
border: 1px solid #e0e0e0;
}
"""
#Gradio Interface
rag_application = gr.Interface(
fn = retriever_qa,
allow_flagging = "never",
inputs=[
gr.File(label="Upload PDF File", file_count="single", file_types=[".pdf"],type="filepath", elem_classes="gr-box"), # Drag and drop file upload
gr.Textbox(label = "Input Query", lines=2, placeholder="Type your question here...", elem_classes="gr-box"),
gr.Dropdown(choices=llm_models, value="HuggingFaceH4/zephyr-7b-beta", label="LLM Model", elem_classes="gr-box"),
gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.1, label="Temperature", elem_classes="gr-box"),
gr.Slider(minimum=64, maximum=512, value=128, step=32, label="Max Tokens", elem_classes="gr-box"),
gr.Dropdown(choices=embedding_models, value="sentence-transformers/all-distilroberta-v1", label="Embedding Model", elem_classes="gr-box")
],
outputs=gr.Textbox(label="Output"),
title = "π QBot - Your PDF Assistant",
description="""
### Welcome to QBot - Your Intelligent PDF Analysis Companion!
Transform any PDF document into an interactive knowledge base. Ask questions naturally and get precise answers powered by advanced language models.
#### Features:
π Intelligent PDF Processing
π‘ Multiple Language Models
π― Customizable Response Settings
π Various Embedding Options
#### How to Use:
1. **Upload PDF**: Drop your document in the file uploader
2. **Ask Questions**: Type any question about your document
3. **Customize Settings**:
- Choose your preferred Language Model
- Adjust Temperature (0-1) for response creativity
- Set Max Tokens for response length
- Select Embedding Model for document processing
4. **Get Answers**: Receive AI-powered responses from your document
""",
article="""
#### Advanced Tips:
π **Model Selection**:
- GPT-Neo 2.7B: Best for general-purpose queries
- FLAN-T5 Large: Efficient for straightforward questions
- FLAN-T5 XL: Ideal for complex analysis
ποΈ **Parameter Guide**:
- Temperature: Lower (0.1-0.4) for factual, Higher (0.6-0.9) for creative
- Max Tokens: 128 for brief answers, 256+ for detailed explanations
- Embedding Models: Choose based on document complexity and language
π« Powered by LangChain and Hugging Face
Made with π€ for seamless document interaction
""",
theme=gr.themes.Soft(
primary_hue="green",
secondary_hue="gray",
neutral_hue="gray",
radius_size=gr.themes.sizes.radius_sm,
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
),
css=custom_css
)
#Launch app
rag_application.launch(share=True)
|