File size: 1,678 Bytes
5f0ff3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex, ServiceContext
from transformers import AutoTokenizer, AutoModelForCausalLM
from PyPDF2 import PdfReader

# Load tokenizer and model from HuggingFace (StableLM)
tokenizer = AutoTokenizer.from_pretrained("StabilityAI/stablelm-tuned-alpha-3b")
model = AutoModelForCausalLM.from_pretrained("StabilityAI/stablelm-tuned-alpha-3b")

# Create service context for the LLM
service_context = ServiceContext.from_defaults(
    llm_predictor=(model, tokenizer),  # Attach the model and tokenizer
    chunk_size=1024
)

# Function to load PDF
def load_pdf(file):
    reader = PdfReader(file.name)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to create an index and query it
def chat_with_pdf(pdf, query):
    # Read the PDF content
    pdf_text = load_pdf(pdf)
    
    # Use llama-index to create a document index
    documents = [pdf_text]
    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
    
    # Query the index
    response = index.query(query)
    return response.response

# Gradio interface
def chatbot(pdf, query):
    if not pdf or not query:
        return "Please upload a PDF and enter a query."
    
    response = chat_with_pdf(pdf, query)
    return response

# Define Gradio inputs and interface
pdf_input = gr.inputs.File(label="Upload your PDF")
query_input = gr.inputs.Textbox(label="Ask a question about the PDF")
output = gr.outputs.Textbox(label="Chatbot Response")

gr.Interface(fn=chatbot, inputs=[pdf_input, query_input], outputs=output, title="PDF Chatbot").launch()