File size: 1,794 Bytes
4688ae4 e4a1f31 e776a42 23c47e2 4688ae4 e4a1f31 7b208e8 ed5b1fa e776a42 c7c4e48 e4a1f31 12978ef 663bca5 ed5b1fa a9e9dbd ed5b1fa 4688ae4 272eebb 4688ae4 12978ef e776a42 011e1bd 75c6f14 ed5b1fa 3dd11fc 92bd63a 75c6f14 7b208e8 ed5b1fa 6d20f5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import os
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import tempfile
# Function to perform question-answering
@st.cache_data(show_spinner=False)
def question_answering(questions, pdf_text):
# Perform question-answering using Hugging Face's Transformers
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
answers = []
for question in questions:
answer = question_answerer(question=question, context=pdf_text)
answers.append(answer)
return answers
def main():
st.title("Legal QA")
uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
st.write("Enter your question(s) below (separate multiple questions with new lines):")
questions = st.text_area("Questions").split('\n')
if st.button("Answer") and uploaded_file is not None:
pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
with open(pdf_path, "wb") as f:
f.write(uploaded_file.read())
# Read PDF text once and cache it for batch processing
pdf_reader = PdfReader(pdf_path)
pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])
# Perform question-answering in batches
answers = question_answering(questions, pdf_text)
# Display the results as a table with a header row
table_data = [["Question", "Answer", "Score"]]
for i, (question, answer) in enumerate(zip(questions, answers)):
table_data.append([question, answer['answer'], f"{answer['score']:.2f}"])
st.write("Questions and Answers:")
st.table(table_data)
if __name__ == "__main__":
main() |