import streamlit as st from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline from PyPDF2 import PdfReader # Function to perform question-answering def question_answering(questions, pdf_text): # Load the model and tokenizer model_name = "distilbert-base-cased-distilled-squad" model = AutoModelForQuestionAnswering.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Create a QuestionAnsweringPipeline instance question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer) answers = question_answerer(question=questions, context=pdf_text) return answers def main(): st.title("Question Answering on PDF Files") # Allow user to upload a single PDF file uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if not uploaded_file: st.warning("Please upload a PDF file.") return st.subheader(f"Processing PDF file: {uploaded_file.name}") if uploaded_file.size == 0: st.error(f"Error: File '{uploaded_file.name}' is empty.") return with uploaded_file: pdf_reader = PdfReader(uploaded_file) pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages]) # Get questions from the user (allow for multiple questions separated by newlines) user_input = st.text_area("Enter your question(s) separated by newlines:") questions = user_input.strip().split("\n") if not questions: st.warning("No questions entered.") return if st.button("Get Answers"): # Perform question-answering answers = question_answering(questions, pdf_text) st.subheader("Questions and Answers:") for i, (question, answer) in enumerate(zip(questions, answers)): st.write(f"Question {i + 1}: '{question}'") st.write("Answer:", answer['answer']) # Access the answer directly st.write(f"Score: {answer['score']:.2f}") # Format the score to 2 decimal places st.write("") # Add a new line after each answer if __name__ == "__main__": main()