Kevin Hu
commited on
Commit
·
d453e49
1
Parent(s):
6c993fc
Make infinity adapt (#4635)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- agent/templates/research_report.json +0 -0
- conf/infinity_mapping.json +1 -1
- graphrag/utils.py +1 -1
- rag/utils/infinity_conn.py +11 -4
agent/templates/research_report.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
conf/infinity_mapping.json
CHANGED
@@ -30,7 +30,7 @@
|
|
30 |
"knowledge_graph_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
31 |
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
32 |
"pagerank_fea": {"type": "integer", "default": 0},
|
33 |
-
"tag_feas": {"type": "
|
34 |
|
35 |
"important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
36 |
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
|
|
30 |
"knowledge_graph_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
31 |
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
32 |
"pagerank_fea": {"type": "integer", "default": 0},
|
33 |
+
"tag_feas": {"type": "varchar", "default": ""},
|
34 |
|
35 |
"important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
36 |
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
|
graphrag/utils.py
CHANGED
@@ -484,7 +484,7 @@ def update_nodes_pagerank_nhop_neighbour(tenant_id, kb_id, graph, n_hop):
|
|
484 |
chunk,
|
485 |
search.index_name(tenant_id), kb_id)
|
486 |
else:
|
487 |
-
settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id))
|
488 |
|
489 |
|
490 |
def get_entity_type2sampels(idxnms, kb_ids: list):
|
|
|
484 |
chunk,
|
485 |
search.index_name(tenant_id), kb_id)
|
486 |
else:
|
487 |
+
settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id), kb_id)
|
488 |
|
489 |
|
490 |
def get_entity_type2sampels(idxnms, kb_ids: list):
|
rag/utils/infinity_conn.py
CHANGED
@@ -299,7 +299,7 @@ class InfinityConnection(DocStoreConnection):
|
|
299 |
matchExpr.extra_options[k] = str(v)
|
300 |
logger.debug(f"INFINITY search MatchTextExpr: {json.dumps(matchExpr.__dict__)}")
|
301 |
elif isinstance(matchExpr, MatchDenseExpr):
|
302 |
-
if filter_cond and "filter" not in matchExpr.extra_options:
|
303 |
matchExpr.extra_options.update({"filter": filter_fulltext})
|
304 |
for k, v in matchExpr.extra_options.items():
|
305 |
if not isinstance(v, str):
|
@@ -424,9 +424,11 @@ class InfinityConnection(DocStoreConnection):
|
|
424 |
assert "_id" not in d
|
425 |
assert "id" in d
|
426 |
for k, v in d.items():
|
427 |
-
if k in ["important_kwd", "question_kwd", "entities_kwd"]:
|
428 |
assert isinstance(v, list)
|
429 |
d[k] = "###".join(v)
|
|
|
|
|
430 |
elif k == 'kb_id':
|
431 |
if isinstance(d[k], list):
|
432 |
d[k] = d[k][0] # since d[k] is a list, but we need a str
|
@@ -462,7 +464,12 @@ class InfinityConnection(DocStoreConnection):
|
|
462 |
del condition["exist"]
|
463 |
filter = equivalent_condition_to_str(condition)
|
464 |
for k, v in list(newValue.items()):
|
465 |
-
if k
|
|
|
|
|
|
|
|
|
|
|
466 |
newValue[k] = " ".join(v)
|
467 |
elif k == 'kb_id':
|
468 |
if isinstance(newValue[k], list):
|
@@ -531,7 +538,7 @@ class InfinityConnection(DocStoreConnection):
|
|
531 |
v = res[fieldnm][i]
|
532 |
if isinstance(v, Series):
|
533 |
v = list(v)
|
534 |
-
elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd"]:
|
535 |
assert isinstance(v, str)
|
536 |
v = [kwd for kwd in v.split("###") if kwd]
|
537 |
elif fieldnm == "position_int":
|
|
|
299 |
matchExpr.extra_options[k] = str(v)
|
300 |
logger.debug(f"INFINITY search MatchTextExpr: {json.dumps(matchExpr.__dict__)}")
|
301 |
elif isinstance(matchExpr, MatchDenseExpr):
|
302 |
+
if filter_fulltext and filter_cond and "filter" not in matchExpr.extra_options:
|
303 |
matchExpr.extra_options.update({"filter": filter_fulltext})
|
304 |
for k, v in matchExpr.extra_options.items():
|
305 |
if not isinstance(v, str):
|
|
|
424 |
assert "_id" not in d
|
425 |
assert "id" in d
|
426 |
for k, v in d.items():
|
427 |
+
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
428 |
assert isinstance(v, list)
|
429 |
d[k] = "###".join(v)
|
430 |
+
elif re.search(r"_feas$", k):
|
431 |
+
d[k] = json.dumps(v)
|
432 |
elif k == 'kb_id':
|
433 |
if isinstance(d[k], list):
|
434 |
d[k] = d[k][0] # since d[k] is a list, but we need a str
|
|
|
464 |
del condition["exist"]
|
465 |
filter = equivalent_condition_to_str(condition)
|
466 |
for k, v in list(newValue.items()):
|
467 |
+
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
468 |
+
assert isinstance(v, list)
|
469 |
+
newValue[k] = "###".join(v)
|
470 |
+
elif re.search(r"_feas$", k):
|
471 |
+
newValue[k] = json.dumps(v)
|
472 |
+
elif k.endswith("_kwd") and isinstance(v, list):
|
473 |
newValue[k] = " ".join(v)
|
474 |
elif k == 'kb_id':
|
475 |
if isinstance(newValue[k], list):
|
|
|
538 |
v = res[fieldnm][i]
|
539 |
if isinstance(v, Series):
|
540 |
v = list(v)
|
541 |
+
elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
542 |
assert isinstance(v, str)
|
543 |
v = [kwd for kwd in v.split("###") if kwd]
|
544 |
elif fieldnm == "position_int":
|