legal-qa

Sleeping

App Files Files Community

legal-qa / app.py

atlury

Update app.py

a9e9dbd verified over 1 year ago

raw

history blame contribute delete

1.79 kB

	import os
	import streamlit as st
	from transformers import pipeline
	from PyPDF2 import PdfReader
	import tempfile

	# Function to perform question-answering
	@st.cache_data(show_spinner=False)
	def question_answering(questions, pdf_text):
	# Perform question-answering using Hugging Face's Transformers
	question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")

	answers = []
	for question in questions:
	answer = question_answerer(question=question, context=pdf_text)
	answers.append(answer)

	return answers

	def main():
	st.title("Legal QA")

	uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])

	st.write("Enter your question(s) below (separate multiple questions with new lines):")
	questions = st.text_area("Questions").split('\n')

	if st.button("Answer") and uploaded_file is not None:
	pdf_path = os.path.join(tempfile.gettempdir(), uploaded_file.name)
	with open(pdf_path, "wb") as f:
	f.write(uploaded_file.read())

	# Read PDF text once and cache it for batch processing
	pdf_reader = PdfReader(pdf_path)
	pdf_text = "\n".join([pdf_page.extract_text() for pdf_page in pdf_reader.pages])

	# Perform question-answering in batches
	answers = question_answering(questions, pdf_text)

	# Display the results as a table with a header row
	table_data = [["Question", "Answer", "Score"]]
	for i, (question, answer) in enumerate(zip(questions, answers)):
	table_data.append([question, answer['answer'], f"{answer['score']:.2f}"])

	st.write("Questions and Answers:")
	st.table(table_data)

	if __name__ == "__main__":
	main()