|
import streamlit as st |
|
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline |
|
from PyPDF2 import PdfReader |
|
|
|
|
|
def question_answering(questions, pdf_text): |
|
|
|
model_name = "distilbert-base-cased-distilled-squad" |
|
model = AutoModelForQuestionAnswering.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
|
|
answers = question_answerer(question=questions, context=pdf_text) |
|
|
|
return answers |
|
|
|
def main(): |
|
st.title("Question Answering on PDF Files") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
|
if not uploaded_file: |
|
st.warning("Please upload a PDF file.") |
|
return |
|
|
|
st.subheader(f"Processing PDF file: {uploaded_file.name}") |
|
|
|
if uploaded_file.size == 0: |
|
st.error(f"Error: File '{uploaded_file.name}' is empty.") |
|
return |
|
|
|
with uploaded_file: |
|
pdf_reader = PdfReader(uploaded_file) |
|
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages]) |
|
|
|
|
|
user_input = st.text_area("Enter your question(s) separated by newlines:") |
|
questions = user_input.strip().split("\n") |
|
|
|
if not questions: |
|
st.warning("No questions entered.") |
|
return |
|
|
|
if st.button("Get Answers"): |
|
|
|
answers = question_answering(questions, pdf_text) |
|
|
|
st.subheader("Questions and Answers:") |
|
for i, (question, answer) in enumerate(zip(questions, answers)): |
|
st.write(f"Question {i + 1}: '{question}'") |
|
st.write("Answer:", answer['answer']) |
|
st.write(f"Score: {answer['score']:.2f}") |
|
st.write("") |
|
|
|
if __name__ == "__main__": |
|
main() |