|
import os |
|
import streamlit as st |
|
from transformers import pipeline |
|
from PyPDF2 import PdfReader |
|
import tempfile |
|
|
|
|
|
@st.cache_data(show_spinner=False) |
|
def question_answering(questions, pdf_text): |
|
|
|
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad") |
|
|
|
answers = [] |
|
for question in questions: |
|
answer = question_answerer(question=question, context=pdf_text) |
|
answers.append(answer) |
|
|
|
return answers |
|
|
|
def main(): |
|
st.title("Legal QA") |
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"]) |
|
|
|
st.write("Enter your question(s) below (separate multiple questions with new lines):") |
|
questions = st.text_area("Questions").split('\n') |
|
|
|
if st.button("Answer") and uploaded_file is not None: |
|
pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name) |
|
with open(pdf_path, "wb") as f: |
|
f.write(uploaded_file.read()) |
|
|
|
|
|
pdf_reader = PdfReader(pdf_path) |
|
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages]) |
|
|
|
|
|
answers = question_answering(questions, pdf_text) |
|
|
|
|
|
table_data = [["Question", "Answer", "Score"]] |
|
for i, (question, answer) in enumerate(zip(questions, answers)): |
|
table_data.append([question, answer['answer'], f"{answer['score']:.2f}"]) |
|
|
|
st.write("Questions and Answers:") |
|
st.table(table_data) |
|
|
|
if __name__ == "__main__": |
|
main() |