KKowenn commited on
Commit
b0e3283
·
verified ·
1 Parent(s): dc6c06e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -178,7 +178,7 @@ def extract_text_tables_pdfplumber(pdf_file):
178
  print("No text extracted. The PDF might be image-based.")
179
  return None, None
180
 
181
- def split_text_into_chunks(text, tokenizer, max_tokens=256):
182
  sentences = nltk.sent_tokenize(text)
183
  chunks = []
184
  current_chunk = ''
@@ -331,7 +331,7 @@ else:
331
  st.write(f"Original text length: {input_length} words")
332
 
333
  # Define the maximum number of tokens the model can handle
334
- max_input_tokens = 256
335
 
336
  # Function to split text into chunks based on tokens (modified to avoid overlaps)
337
  def split_text_into_chunks(text, tokenizer, max_tokens=max_input_tokens):
 
178
  print("No text extracted. The PDF might be image-based.")
179
  return None, None
180
 
181
+ def split_text_into_chunks(text, tokenizer, max_tokens=512):
182
  sentences = nltk.sent_tokenize(text)
183
  chunks = []
184
  current_chunk = ''
 
331
  st.write(f"Original text length: {input_length} words")
332
 
333
  # Define the maximum number of tokens the model can handle
334
+ max_input_tokens = 512
335
 
336
  # Function to split text into chunks based on tokens (modified to avoid overlaps)
337
  def split_text_into_chunks(text, tokenizer, max_tokens=max_input_tokens):