Spaces:

adinarayana
/

Sample

Sleeping

App Files Files Community

Sample / app.py

adinarayana

Update app.py

30e1f68 verified about 1 year ago

raw

history blame

2.8 kB

	"""
	from langchain.llms import OpenAI

	# from dotenv import load_dotenv
	import os

	# take environment variables from .env
	# load_dotenv()

	import streamlit as st

	# load OpenAI model and get a response


	def get_openai_response(question):
	llm = OpenAI(
	openai_api_key=os.getenv("OPEN_API_KEY"),
	model_name="gpt-3.5-turbo-instruct",
	temperature=0.6,
	)
	response = llm(question)
	return response
	# modify with chain and other stuff


	## streamlit app

	st.set_page_config(page_title="QandA Demo")
	st.header("Langchain Application")

	input = st.text_input("Input: ", key=input)
	response = get_openai_response(input)


	submit = st.button("Generate")
	if submit:
	st.subheader("The response is")
	st.write(response)
	"""




	import os
	import re
	import pdfminer
	from pdfminer.high_level import extract_pages
	from transformers import pipeline

	import streamlit as st

	def preprocess_text(element):
	if isinstance(element, pdfminer.layout.LTTextBoxHorizontal): # Check for text elements
	text = element.get_text().strip()
	# Remove non-textual elements
	text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression

	# Remove stop words (optional)
	# from nltk.corpus import stopwords
	# stop_words = set(stopwords.words('english'))
	# text = " ".join([word for word in text.split() if word not in stop_words])

	# Convert to lowercase (optional)
	# text = text.lower()
	return text
	else:
	return ""

	def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
	summarizer = pipeline("summarization", model=model)
	return summarizer(text, max_length=length)

	## Streamlit app

	st.set_page_config(page_title="Trail Demo")
	st.header("PDF Summarizer")

	# User options
	st.subheader("Settings")
	summary_length = st.slider("Summary Length", min_value=50, max_value=500, value=100)
	summarization_model = st.selectbox("Summarization Model", ["gpt-3.5-turbo-instruct", "t5-small", "facebook/bart-large-cnn"])

	# File upload and processing
	uploaded_file = st.file_uploader("Choose a PDF file")
	if uploaded_file is not None:
	with st.spinner("Processing..."):
	text = ""
	for page_layout in extract_pages(uploaded_file):
	for element in page_layout:
	text += preprocess_text(element) + "\n"
	if text:
	submit = st.button("Generate Summary")
	if submit:
	st.spinner("Summarizing...")
	response = get_openai_response(text, length=summary_length, model=summarization_model)
	st.subheader("Summary")
	st.write(response[0]["summary_text"])
	else:
	st.error("No text found in the PDF.")