File size: 2,228 Bytes
e4a1f31
8cf0fcf
23c47e2
e4a1f31
7b208e8
12978ef
8cf0fcf
 
 
 
 
eb88e53
8cf0fcf
eb88e53
12978ef
e4a1f31
12978ef
663bca5
e4a1f31
12978ef
e4a1f31
272eebb
 
12978ef
272eebb
 
 
09ef786
272eebb
 
 
 
 
 
 
 
12978ef
 
272eebb
 
 
 
 
 
 
12978ef
011e1bd
 
 
 
 
 
 
2de15d0
 
011e1bd
7b208e8
e4a1f31
272eebb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline
from PyPDF2 import PdfReader

# Function to perform question-answering
def question_answering(questions, pdf_text):
    # Load the model and tokenizer
    model_name = "distilbert-base-cased-distilled-squad"
    model = AutoModelForQuestionAnswering.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Create a QuestionAnsweringPipeline instance
    question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
    
    answers = question_answerer(question=questions, context=pdf_text)

    return answers

def main():
    st.title("Question Answering on PDF Files")

    # Allow user to upload a single PDF file
    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

    if not uploaded_file:
        st.warning("Please upload a PDF file.")
        return

    st.subheader(f"Processing PDF file: {uploaded_file.name}")

    if uploaded_file.size == 0:
        st.error(f"Error: File '{uploaded_file.name}' is empty.")
        return

    with uploaded_file:
        pdf_reader = PdfReader(uploaded_file)
        pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])

        # Get questions from the user (allow for multiple questions separated by newlines)
        user_input = st.text_area("Enter your question(s) separated by newlines:")
        questions = user_input.strip().split("\n")

        if not questions:
            st.warning("No questions entered.")
            return

        if st.button("Get Answers"):
            # Perform question-answering
            answers = question_answering(questions, pdf_text)

            st.subheader("Questions and Answers:")
            for i, (question, answer) in enumerate(zip(questions, answers)):
                st.write(f"Question {i + 1}: '{question}'")
                st.write("Answer:", answer['answer'])  # Access the answer directly
                st.write(f"Score: {answer['score']:.2f}")  # Format the score to 2 decimal places
                st.write("")  # Add a new line after each answer

if __name__ == "__main__":
    main()