Spaces:

adinarayana
/

Sample

Sleeping

adinarayana commited on Feb 14, 2024

Commit

eb9dd95

verified ·

1 Parent(s): cd78587

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,21 +6,21 @@ from transformers import pipeline
 import streamlit as st
 def preprocess_text(element):
-    # Extract text content
-    text = element.get_text().strip()
-    # Remove non-textual elements
-    text = re.sub(r'[^\w\s]', '', text)  # Replace with your preferred regular expression
-    # Remove stop words (optional)
-    # from nltk.corpus import stopwords
-    # stop_words = set(stopwords.words('english'))
-    # text = " ".join([word for word in text.split() if word not in stop_words])
-    # Convert to lowercase (optional)
-    # text = text.lower()
-    return text
 def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
     summarizer = pipeline("summarization", model=model)

 import streamlit as st
 def preprocess_text(element):
+    if isinstance(element, pdfminer.layout.LTTextBoxHorizontal):  # Check for text elements
+        text = element.get_text().strip()
+        # Remove non-textual elements
+        text = re.sub(r'[^\w\s]', '', text)  # Replace with your preferred regular expression
+        # Remove stop words (optional)
+        # from nltk.corpus import stopwords
+        # stop_words = set(stopwords.words('english'))
+        # text = " ".join([word for word in text.split() if word not in stop_words])
+        # Convert to lowercase (optional)
+        # text = text.lower()
+        return text
+    else:
+        return "
 def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
     summarizer = pipeline("summarization", model=model)