adinarayana commited on
Commit
eb9dd95
·
verified ·
1 Parent(s): cd78587

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -6,21 +6,21 @@ from transformers import pipeline
6
  import streamlit as st
7
 
8
  def preprocess_text(element):
9
- # Extract text content
10
- text = element.get_text().strip()
11
-
12
- # Remove non-textual elements
13
- text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression
14
-
15
- # Remove stop words (optional)
16
- # from nltk.corpus import stopwords
17
- # stop_words = set(stopwords.words('english'))
18
- # text = " ".join([word for word in text.split() if word not in stop_words])
19
-
20
- # Convert to lowercase (optional)
21
- # text = text.lower()
22
-
23
- return text
24
 
25
  def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
26
  summarizer = pipeline("summarization", model=model)
 
6
  import streamlit as st
7
 
8
  def preprocess_text(element):
9
+ if isinstance(element, pdfminer.layout.LTTextBoxHorizontal): # Check for text elements
10
+ text = element.get_text().strip()
11
+ # Remove non-textual elements
12
+ text = re.sub(r'[^\w\s]', '', text) # Replace with your preferred regular expression
13
+
14
+ # Remove stop words (optional)
15
+ # from nltk.corpus import stopwords
16
+ # stop_words = set(stopwords.words('english'))
17
+ # text = " ".join([word for word in text.split() if word not in stop_words])
18
+
19
+ # Convert to lowercase (optional)
20
+ # text = text.lower()
21
+ return text
22
+ else:
23
+ return "
24
 
25
  def get_openai_response(text, length=100, model="gpt-3.5-turbo-instruct"):
26
  summarizer = pipeline("summarization", model=model)