File size: 2,228 Bytes
e4a1f31 8cf0fcf 23c47e2 e4a1f31 7b208e8 12978ef 8cf0fcf eb88e53 8cf0fcf eb88e53 12978ef e4a1f31 12978ef 663bca5 e4a1f31 12978ef e4a1f31 272eebb 12978ef 272eebb 09ef786 272eebb 12978ef 272eebb 12978ef 011e1bd 2de15d0 011e1bd 7b208e8 e4a1f31 272eebb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline
from PyPDF2 import PdfReader
# Function to perform question-answering
def question_answering(questions, pdf_text):
# Load the model and tokenizer
model_name = "distilbert-base-cased-distilled-squad"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Create a QuestionAnsweringPipeline instance
question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
answers = question_answerer(question=questions, context=pdf_text)
return answers
def main():
st.title("Question Answering on PDF Files")
# Allow user to upload a single PDF file
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if not uploaded_file:
st.warning("Please upload a PDF file.")
return
st.subheader(f"Processing PDF file: {uploaded_file.name}")
if uploaded_file.size == 0:
st.error(f"Error: File '{uploaded_file.name}' is empty.")
return
with uploaded_file:
pdf_reader = PdfReader(uploaded_file)
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
# Get questions from the user (allow for multiple questions separated by newlines)
user_input = st.text_area("Enter your question(s) separated by newlines:")
questions = user_input.strip().split("\n")
if not questions:
st.warning("No questions entered.")
return
if st.button("Get Answers"):
# Perform question-answering
answers = question_answering(questions, pdf_text)
st.subheader("Questions and Answers:")
for i, (question, answer) in enumerate(zip(questions, answers)):
st.write(f"Question {i + 1}: '{question}'")
st.write("Answer:", answer['answer']) # Access the answer directly
st.write(f"Score: {answer['score']:.2f}") # Format the score to 2 decimal places
st.write("") # Add a new line after each answer
if __name__ == "__main__":
main() |