Spaces:

adinarayana
/

Sample

Sleeping

App Files Files Community

adinarayana commited on Feb 14, 2024

Commit

de71dbe

verified ·

1 Parent(s): e610de2

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -19

app.py CHANGED Viewed

@@ -4,32 +4,54 @@ from transformers import pipeline
 import streamlit as st
-def get_openai_response(df):
-    summarizer = pipeline("summarization")
-    return summarizer(df)
-## streamlit app
-st.set_page_config(page_title="Trail Demo")
-st.header("Sample")
-st.write("UPDATE: This app uses the 'gpt-3.5-turbo-instruct' model through Langchain")
-# input = st.text_input("Enter your query: ", key=input)
-st.write(pdfminer.__version__)
-uploaded_file = st.file_uploader("Choose a file", "pdf")
 if uploaded_file is not None:
-    for page_layout in extract_pages(uploaded_file):
-        for element in page_layout:
-            df = element
-response = get_openai_response(df)
-submit = st.button("Generate")
-if submit:
-    st.subheader("The response is")
-    st.write(response)

 import streamlit as st
+def preprocess_text(element):
+    # Extract text content
+    text = element.get_text().strip()
+    # Remove non-textual elements
+    text = re.sub(r'[^\w\s]', '', text)  # Replace with your preferred regular expression
+    # Remove stop words (optional)
+    # from nltk.corpus import stopwords
+    # stop_words = set(stopwords.words('english'))
+    # text = " ".join([word for word in text.split() if word not in stop_words])
+    # Convert to lowercase (optional)
+    # text = text.lower()
+    return text
+def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
+    summarizer = pipeline("summarization", model=model)
+    return summarizer(text, max_length=length)
+## Streamlit app
+st.set_page_config(page_title="Trail Demo")
+st.header("PDF Summarizer")
+# User options
+st.subheader("Settings")
+summary_length = st.slider("Summary Length", min_value=50, max_value=500, value=100)
+summarization_model = st.selectbox("Summarization Model", ["gpt-3.5-turbo-instruct", "t5-small"])
+# File upload and processing
+uploaded_file = st.file_uploader("Choose a PDF file")
 if uploaded_file is not None:
+    with st.spinner("Processing..."):
+        text = ""
+        for page_layout in extract_pages(uploaded_file):
+            for element in page_layout:
+                text += preprocess_text(element) + "\n"
+        if text:
+            st.subheader("Extracted Text")
+            st.write(text)
+            submit = st.button("Generate Summary")
+            if submit:
+                st.spinner("Summarizing...")
+                response = get_openai_response(text, length=summary_length, model=summarization_model)
+                st.subheader("Summary")
+                st.write(response[0]["summary_text"])
+        else:
+            st.error("No text found in the PDF.")