Kevin Hu commited on
Commit
58ecd6d
·
1 Parent(s): cf493b7

make excel parsing configurable (#2517)

Browse files

### What problem does this PR solve?

#2516

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Files changed (2) hide show
  1. rag/app/naive.py +4 -1
  2. rag/llm/chat_model.py +1 -0
rag/app/naive.py CHANGED
@@ -221,7 +221,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
221
  elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
222
  callback(0.1, "Start to parse.")
223
  excel_parser = ExcelParser()
224
- sections = [(l, "") for l in excel_parser.html(binary) if l]
 
 
 
225
 
226
  elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
227
  callback(0.1, "Start to parse.")
 
221
  elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
222
  callback(0.1, "Start to parse.")
223
  excel_parser = ExcelParser()
224
+ if parser_config.get("html4excel"):
225
+ sections = [(l, "") for l in excel_parser.html(binary, 12) if l]
226
+ else:
227
+ sections = [(l, "") for l in excel_parser(binary) if l]
228
 
229
  elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
230
  callback(0.1, "Start to parse.")
rag/llm/chat_model.py CHANGED
@@ -689,6 +689,7 @@ class BedrockChat(Base):
689
 
690
  yield num_tokens_from_string(ans)
691
 
 
692
  class GeminiChat(Base):
693
 
694
  def __init__(self, key, model_name,base_url=None):
 
689
 
690
  yield num_tokens_from_string(ans)
691
 
692
+
693
  class GeminiChat(Base):
694
 
695
  def __init__(self, key, model_name,base_url=None):