Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,21 +6,21 @@ from transformers import pipeline
|
|
6 |
import streamlit as st
|
7 |
|
8 |
def preprocess_text(element):
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
|
25 |
def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
|
26 |
summarizer = pipeline("summarization", model=model)
|
|
|
6 |
import streamlit as st
|
7 |
|
8 |
def preprocess_text(element):
|
9 |
+
if isinstance(element, pdfminer.layout.LTTextBoxHorizontal): # Check for text elements
|
10 |
+
text = element.get_text().strip()
|
11 |
+
# Remove non-textual elements
|
12 |
+
text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression
|
13 |
+
|
14 |
+
# Remove stop words (optional)
|
15 |
+
# from nltk.corpus import stopwords
|
16 |
+
# stop_words = set(stopwords.words('english'))
|
17 |
+
# text = " ".join([word for word in text.split() if word not in stop_words])
|
18 |
+
|
19 |
+
# Convert to lowercase (optional)
|
20 |
+
# text = text.lower()
|
21 |
+
return text
|
22 |
+
else:
|
23 |
+
return "
|
24 |
|
25 |
def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
|
26 |
summarizer = pipeline("summarization", model=model)
|