Commit
·
a11ffc5
1
Parent(s):
5461e28
Fix json file parse (#4004)
Browse files### What problem does this PR solve?
Fix json file parsing
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---------
Signed-off-by: jinhai <[email protected]>
deepdoc/parser/json_parser.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
|
5 |
import json
|
6 |
from typing import Any
|
|
|
7 |
from rag.nlp import find_codec
|
8 |
class RAGFlowJsonParser:
|
9 |
def __init__(
|
@@ -53,7 +54,7 @@ class RAGFlowJsonParser:
|
|
53 |
|
54 |
def _json_split(
|
55 |
self,
|
56 |
-
data
|
57 |
current_path: list[str] | None,
|
58 |
chunks: list[dict] | None,
|
59 |
) -> list[dict]:
|
@@ -86,13 +87,14 @@ class RAGFlowJsonParser:
|
|
86 |
|
87 |
def split_json(
|
88 |
self,
|
89 |
-
json_data
|
90 |
convert_lists: bool = False,
|
91 |
) -> list[dict]:
|
92 |
"""Splits JSON into a list of JSON chunks"""
|
93 |
|
94 |
if convert_lists:
|
95 |
-
|
|
|
96 |
else:
|
97 |
chunks = self._json_split(json_data, None, None)
|
98 |
|
|
|
4 |
|
5 |
import json
|
6 |
from typing import Any
|
7 |
+
|
8 |
from rag.nlp import find_codec
|
9 |
class RAGFlowJsonParser:
|
10 |
def __init__(
|
|
|
54 |
|
55 |
def _json_split(
|
56 |
self,
|
57 |
+
data,
|
58 |
current_path: list[str] | None,
|
59 |
chunks: list[dict] | None,
|
60 |
) -> list[dict]:
|
|
|
87 |
|
88 |
def split_json(
|
89 |
self,
|
90 |
+
json_data,
|
91 |
convert_lists: bool = False,
|
92 |
) -> list[dict]:
|
93 |
"""Splits JSON into a list of JSON chunks"""
|
94 |
|
95 |
if convert_lists:
|
96 |
+
preprocessed_data = self._list_to_dict_preprocessing(json_data)
|
97 |
+
chunks = self._json_split(preprocessed_data, None, None)
|
98 |
else:
|
99 |
chunks = self._json_split(json_data, None, None)
|
100 |
|