Spaces:
Sleeping
Sleeping
""" | |
from langchain.llms import OpenAI | |
# from dotenv import load_dotenv | |
import os | |
# take environment variables from .env | |
# load_dotenv() | |
import streamlit as st | |
# load OpenAI model and get a response | |
def get_openai_response(question): | |
llm = OpenAI( | |
openai_api_key=os.getenv("OPEN_API_KEY"), | |
model_name="gpt-3.5-turbo-instruct", | |
temperature=0.6, | |
) | |
response = llm(question) | |
return response | |
# modify with chain and other stuff | |
## streamlit app | |
st.set_page_config(page_title="QandA Demo") | |
st.header("Langchain Application") | |
input = st.text_input("Input: ", key=input) | |
response = get_openai_response(input) | |
submit = st.button("Generate") | |
if submit: | |
st.subheader("The response is") | |
st.write(response) | |
""" | |
import os | |
import re | |
import pdfminer | |
from pdfminer.high_level import extract_pages | |
from transformers import pipeline | |
import streamlit as st | |
def preprocess_text(element): | |
if isinstance(element, pdfminer.layout.LTTextBoxHorizontal): # Check for text elements | |
text = element.get_text().strip() | |
# Remove non-textual elements | |
text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression | |
# Remove stop words (optional) | |
# from nltk.corpus import stopwords | |
# stop_words = set(stopwords.words('english')) | |
# text = " ".join([word for word in text.split() if word not in stop_words]) | |
# Convert to lowercase (optional) | |
# text = text.lower() | |
return text | |
else: | |
return "" | |
def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"): | |
summarizer = pipeline("summarization", model=model) | |
return summarizer(text, max_length=length) | |
## Streamlit app | |
st.set_page_config(page_title="Trail Demo") | |
st.header("PDF Summarizer") | |
# User options | |
st.subheader("Settings") | |
summary_length = st.slider("Summary Length", min_value=50, max_value=500, value=100) | |
summarization_model = st.selectbox("Summarization Model", ["gpt-3.5-turbo-instruct", "t5-small", "facebook/bart-large-cnn"]) | |
# File upload and processing | |
uploaded_file = st.file_uploader("Choose a PDF file") | |
if uploaded_file is not None: | |
with st.spinner("Processing..."): | |
text = "" | |
for page_layout in extract_pages(uploaded_file): | |
for element in page_layout: | |
text += preprocess_text(element) + "\n" | |
if text: | |
submit = st.button("Generate Summary") | |
if submit: | |
st.spinner("Summarizing...") | |
response = get_openai_response(text, length=summary_length, model=summarization_model) | |
st.subheader("Summary") | |
st.write(response[0]["summary_text"]) | |
else: | |
st.error("No text found in the PDF.") | |