Commit
·
43bceb7
1
Parent(s):
8446e15
Fix parsing JSON file error (#3829)
Browse files### What problem does this PR solve?
Close issue: #3828
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Signed-off-by: jinhai <[email protected]>
- deepdoc/parser/json_parser.py +2 -2
- rag/app/naive.py +2 -1
deepdoc/parser/json_parser.py
CHANGED
@@ -92,9 +92,9 @@ class RAGFlowJsonParser:
|
|
92 |
"""Splits JSON into a list of JSON chunks"""
|
93 |
|
94 |
if convert_lists:
|
95 |
-
chunks = self._json_split(self._list_to_dict_preprocessing(json_data))
|
96 |
else:
|
97 |
-
chunks = self._json_split(json_data)
|
98 |
|
99 |
# Remove the last chunk if it's empty
|
100 |
if not chunks[-1]:
|
|
|
92 |
"""Splits JSON into a list of JSON chunks"""
|
93 |
|
94 |
if convert_lists:
|
95 |
+
chunks = self._json_split(self._list_to_dict_preprocessing(json_data), None, None)
|
96 |
else:
|
97 |
+
chunks = self._json_split(json_data, None, None)
|
98 |
|
99 |
# Remove the last chunk if it's empty
|
100 |
if not chunks[-1]:
|
rag/app/naive.py
CHANGED
@@ -258,7 +258,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
258 |
|
259 |
elif re.search(r"\.json$", filename, re.IGNORECASE):
|
260 |
callback(0.1, "Start to parse.")
|
261 |
-
|
|
|
262 |
sections = [(_, "") for _ in sections if _]
|
263 |
callback(0.8, "Finish parsing.")
|
264 |
|
|
|
258 |
|
259 |
elif re.search(r"\.json$", filename, re.IGNORECASE):
|
260 |
callback(0.1, "Start to parse.")
|
261 |
+
chunk_token_num = int(parser_config.get("chunk_token_num", 128))
|
262 |
+
sections = JsonParser(chunk_token_num)(binary)
|
263 |
sections = [(_, "") for _ in sections if _]
|
264 |
callback(0.8, "Finish parsing.")
|
265 |
|