Kevin Hu
commited on
Commit
·
58ecd6d
1
Parent(s):
cf493b7
make excel parsing configurable (#2517)
Browse files### What problem does this PR solve?
#2516
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- rag/app/naive.py +4 -1
- rag/llm/chat_model.py +1 -0
rag/app/naive.py
CHANGED
@@ -221,7 +221,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
221 |
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
222 |
callback(0.1, "Start to parse.")
|
223 |
excel_parser = ExcelParser()
|
224 |
-
|
|
|
|
|
|
|
225 |
|
226 |
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
227 |
callback(0.1, "Start to parse.")
|
|
|
221 |
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
222 |
callback(0.1, "Start to parse.")
|
223 |
excel_parser = ExcelParser()
|
224 |
+
if parser_config.get("html4excel"):
|
225 |
+
sections = [(l, "") for l in excel_parser.html(binary, 12) if l]
|
226 |
+
else:
|
227 |
+
sections = [(l, "") for l in excel_parser(binary) if l]
|
228 |
|
229 |
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
230 |
callback(0.1, "Start to parse.")
|
rag/llm/chat_model.py
CHANGED
@@ -689,6 +689,7 @@ class BedrockChat(Base):
|
|
689 |
|
690 |
yield num_tokens_from_string(ans)
|
691 |
|
|
|
692 |
class GeminiChat(Base):
|
693 |
|
694 |
def __init__(self, key, model_name,base_url=None):
|
|
|
689 |
|
690 |
yield num_tokens_from_string(ans)
|
691 |
|
692 |
+
|
693 |
class GeminiChat(Base):
|
694 |
|
695 |
def __init__(self, key, model_name,base_url=None):
|