Sample / app.py
adinarayana's picture
Update app.py
30e1f68 verified
raw
history blame
2.8 kB
"""
from langchain.llms import OpenAI
# from dotenv import load_dotenv
import os
# take environment variables from .env
# load_dotenv()
import streamlit as st
# load OpenAI model and get a response
def get_openai_response(question):
llm = OpenAI(
openai_api_key=os.getenv("OPEN_API_KEY"),
model_name="gpt-3.5-turbo-instruct",
temperature=0.6,
)
response = llm(question)
return response
# modify with chain and other stuff
## streamlit app
st.set_page_config(page_title="QandA Demo")
st.header("Langchain Application")
input = st.text_input("Input: ", key=input)
response = get_openai_response(input)
submit = st.button("Generate")
if submit:
st.subheader("The response is")
st.write(response)
"""
import os
import re
import pdfminer
from pdfminer.high_level import extract_pages
from transformers import pipeline
import streamlit as st
def preprocess_text(element):
if isinstance(element, pdfminer.layout.LTTextBoxHorizontal): # Check for text elements
text = element.get_text().strip()
# Remove non-textual elements
text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression
# Remove stop words (optional)
# from nltk.corpus import stopwords
# stop_words = set(stopwords.words('english'))
# text = " ".join([word for word in text.split() if word not in stop_words])
# Convert to lowercase (optional)
# text = text.lower()
return text
else:
return ""
def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
summarizer = pipeline("summarization", model=model)
return summarizer(text, max_length=length)
## Streamlit app
st.set_page_config(page_title="Trail Demo")
st.header("PDF Summarizer")
# User options
st.subheader("Settings")
summary_length = st.slider("Summary Length", min_value=50, max_value=500, value=100)
summarization_model = st.selectbox("Summarization Model", ["gpt-3.5-turbo-instruct", "t5-small", "facebook/bart-large-cnn"])
# File upload and processing
uploaded_file = st.file_uploader("Choose a PDF file")
if uploaded_file is not None:
with st.spinner("Processing..."):
text = ""
for page_layout in extract_pages(uploaded_file):
for element in page_layout:
text += preprocess_text(element) + "\n"
if text:
submit = st.button("Generate Summary")
if submit:
st.spinner("Summarizing...")
response = get_openai_response(text, length=summary_length, model=summarization_model)
st.subheader("Summary")
st.write(response[0]["summary_text"])
else:
st.error("No text found in the PDF.")