Kevin Hu commited on
Commit
d453e49
·
1 Parent(s): 6c993fc

Make infinity adapt (#4635)

Browse files

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

agent/templates/research_report.json CHANGED
The diff for this file is too large to render. See raw diff
 
conf/infinity_mapping.json CHANGED
@@ -30,7 +30,7 @@
30
  "knowledge_graph_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
31
  "entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
32
  "pagerank_fea": {"type": "integer", "default": 0},
33
- "tag_feas": {"type": "integer", "default": 0},
34
 
35
  "important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
36
  "from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
 
30
  "knowledge_graph_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
31
  "entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
32
  "pagerank_fea": {"type": "integer", "default": 0},
33
+ "tag_feas": {"type": "varchar", "default": ""},
34
 
35
  "important_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
36
  "from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace"},
graphrag/utils.py CHANGED
@@ -484,7 +484,7 @@ def update_nodes_pagerank_nhop_neighbour(tenant_id, kb_id, graph, n_hop):
484
  chunk,
485
  search.index_name(tenant_id), kb_id)
486
  else:
487
- settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id))
488
 
489
 
490
  def get_entity_type2sampels(idxnms, kb_ids: list):
 
484
  chunk,
485
  search.index_name(tenant_id), kb_id)
486
  else:
487
+ settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id), kb_id)
488
 
489
 
490
  def get_entity_type2sampels(idxnms, kb_ids: list):
rag/utils/infinity_conn.py CHANGED
@@ -299,7 +299,7 @@ class InfinityConnection(DocStoreConnection):
299
  matchExpr.extra_options[k] = str(v)
300
  logger.debug(f"INFINITY search MatchTextExpr: {json.dumps(matchExpr.__dict__)}")
301
  elif isinstance(matchExpr, MatchDenseExpr):
302
- if filter_cond and "filter" not in matchExpr.extra_options:
303
  matchExpr.extra_options.update({"filter": filter_fulltext})
304
  for k, v in matchExpr.extra_options.items():
305
  if not isinstance(v, str):
@@ -424,9 +424,11 @@ class InfinityConnection(DocStoreConnection):
424
  assert "_id" not in d
425
  assert "id" in d
426
  for k, v in d.items():
427
- if k in ["important_kwd", "question_kwd", "entities_kwd"]:
428
  assert isinstance(v, list)
429
  d[k] = "###".join(v)
 
 
430
  elif k == 'kb_id':
431
  if isinstance(d[k], list):
432
  d[k] = d[k][0] # since d[k] is a list, but we need a str
@@ -462,7 +464,12 @@ class InfinityConnection(DocStoreConnection):
462
  del condition["exist"]
463
  filter = equivalent_condition_to_str(condition)
464
  for k, v in list(newValue.items()):
465
- if k.endswith("_kwd") and isinstance(v, list):
 
 
 
 
 
466
  newValue[k] = " ".join(v)
467
  elif k == 'kb_id':
468
  if isinstance(newValue[k], list):
@@ -531,7 +538,7 @@ class InfinityConnection(DocStoreConnection):
531
  v = res[fieldnm][i]
532
  if isinstance(v, Series):
533
  v = list(v)
534
- elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd"]:
535
  assert isinstance(v, str)
536
  v = [kwd for kwd in v.split("###") if kwd]
537
  elif fieldnm == "position_int":
 
299
  matchExpr.extra_options[k] = str(v)
300
  logger.debug(f"INFINITY search MatchTextExpr: {json.dumps(matchExpr.__dict__)}")
301
  elif isinstance(matchExpr, MatchDenseExpr):
302
+ if filter_fulltext and filter_cond and "filter" not in matchExpr.extra_options:
303
  matchExpr.extra_options.update({"filter": filter_fulltext})
304
  for k, v in matchExpr.extra_options.items():
305
  if not isinstance(v, str):
 
424
  assert "_id" not in d
425
  assert "id" in d
426
  for k, v in d.items():
427
+ if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
428
  assert isinstance(v, list)
429
  d[k] = "###".join(v)
430
+ elif re.search(r"_feas$", k):
431
+ d[k] = json.dumps(v)
432
  elif k == 'kb_id':
433
  if isinstance(d[k], list):
434
  d[k] = d[k][0] # since d[k] is a list, but we need a str
 
464
  del condition["exist"]
465
  filter = equivalent_condition_to_str(condition)
466
  for k, v in list(newValue.items()):
467
+ if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
468
+ assert isinstance(v, list)
469
+ newValue[k] = "###".join(v)
470
+ elif re.search(r"_feas$", k):
471
+ newValue[k] = json.dumps(v)
472
+ elif k.endswith("_kwd") and isinstance(v, list):
473
  newValue[k] = " ".join(v)
474
  elif k == 'kb_id':
475
  if isinstance(newValue[k], list):
 
538
  v = res[fieldnm][i]
539
  if isinstance(v, Series):
540
  v = list(v)
541
+ elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
542
  assert isinstance(v, str)
543
  v = [kwd for kwd in v.split("###") if kwd]
544
  elif fieldnm == "position_int":