legal-qa / app.py
ivyblossom's picture
Update app.py
2de15d0
raw
history blame
2.23 kB
import streamlit as st
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline
from PyPDF2 import PdfReader
# Function to perform question-answering
def question_answering(questions, pdf_text):
# Load the model and tokenizer
model_name = "distilbert-base-cased-distilled-squad"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Create a QuestionAnsweringPipeline instance
question_answerer = QuestionAnsweringPipeline(model=model, tokenizer=tokenizer)
answers = question_answerer(question=questions, context=pdf_text)
return answers
def main():
st.title("Question Answering on PDF Files")
# Allow user to upload a single PDF file
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if not uploaded_file:
st.warning("Please upload a PDF file.")
return
st.subheader(f"Processing PDF file: {uploaded_file.name}")
if uploaded_file.size == 0:
st.error(f"Error: File '{uploaded_file.name}' is empty.")
return
with uploaded_file:
pdf_reader = PdfReader(uploaded_file)
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
# Get questions from the user (allow for multiple questions separated by newlines)
user_input = st.text_area("Enter your question(s) separated by newlines:")
questions = user_input.strip().split("\n")
if not questions:
st.warning("No questions entered.")
return
if st.button("Get Answers"):
# Perform question-answering
answers = question_answering(questions, pdf_text)
st.subheader("Questions and Answers:")
for i, (question, answer) in enumerate(zip(questions, answers)):
st.write(f"Question {i + 1}: '{question}'")
st.write("Answer:", answer['answer']) # Access the answer directly
st.write(f"Score: {answer['score']:.2f}") # Format the score to 2 decimal places
st.write("") # Add a new line after each answer
if __name__ == "__main__":
main()