KingNish commited on
Commit
a007d1e
·
verified ·
1 Parent(s): a74d94b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -27,12 +27,11 @@ def xml2text(xml):
27
 
28
  def clean_text(content):
29
  """Cleans text content based on the 'clean' parameter."""
30
- if clean:
31
- content = content.replace('\n', ' ')
32
- content = content.replace('\r', ' ')
33
- content = content.replace('\t', ' ')
34
- content = content.replace(' ', ' ') # Replace double spaces with single
35
- content = content.strip()
36
  return content
37
 
38
 
@@ -66,7 +65,7 @@ def extract_text_from_docx(docx_data, clean=True):
66
  text += xml2text(zipf.read(fname))
67
 
68
  zipf.close()
69
- if clean
70
  text = clean_text(text)
71
  return text, len(text)
72
 
@@ -100,7 +99,7 @@ def read_document(file, clean=True):
100
  for cell in row:
101
  if cell.value is not None:
102
  content += str(cell.value) + ' '
103
- if clean
104
  content = clean_text(content)
105
  return content, len(content)
106
  except Exception as e:
 
27
 
28
  def clean_text(content):
29
  """Cleans text content based on the 'clean' parameter."""
30
+ content = content.replace('\n', ' ')
31
+ content = content.replace('\r', ' ')
32
+ content = content.replace('\t', ' ')
33
+ content = content.replace(' ', ' ') # Replace double spaces with single
34
+ content = content.strip()
 
35
  return content
36
 
37
 
 
65
  text += xml2text(zipf.read(fname))
66
 
67
  zipf.close()
68
+ if clean:
69
  text = clean_text(text)
70
  return text, len(text)
71
 
 
99
  for cell in row:
100
  if cell.value is not None:
101
  content += str(cell.value) + ' '
102
+ if clean:
103
  content = clean_text(content)
104
  return content, len(content)
105
  except Exception as e: