Spaces:

retopara
/

ragflow

Build error

Kevin Hu commited on Aug 2, 2024

Commit

6054f54

1 Parent(s): 8d4e686

Add graphrag (#1793)

### What problem does this PR solve?

#1594

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

{graph → agent}/README.md +0 -0
{graph → agent}/README_zh.md +0 -0
{graph → agent}/__init__.py +0 -0
{graph → agent}/canvas.py +3 -3
{graph → agent}/component/__init__.py +0 -0
{graph → agent}/component/answer.py +1 -1
{graph → agent}/component/arxiv.py +2 -4
{graph → agent}/component/baidu.py +2 -2
{graph → agent}/component/base.py +2 -2
{graph → agent}/component/begin.py +2 -3
{graph → agent}/component/bing.py +2 -2
{graph → agent}/component/categorize.py +2 -5
{graph → agent}/component/cite.py +1 -1
{graph → agent}/component/duckduckgo.py +2 -4
{graph → agent}/component/generate.py +1 -3
{graph → agent}/component/google.py +2 -2
{graph → agent}/component/googlescholar.py +2 -2
{graph → agent}/component/keyword.py +2 -2
{graph → agent}/component/message.py +1 -4
{graph → agent}/component/pubmed.py +2 -4
{graph → agent}/component/relevant.py +1 -1
{graph → agent}/component/retrieval.py +1 -1
{graph → agent}/component/rewrite.py +1 -1
{graph → agent}/component/switch.py +1 -6
{graph → agent}/component/wikipedia.py +2 -2
{graph → agent}/settings.py +0 -0
{graph → agent}/templates/HR_callout_zh.json +0 -0
{graph → agent}/templates/customer_service.json +0 -0
{graph → agent}/templates/general_chat_bot.json +0 -0
{graph → agent}/templates/interpreter.json +0 -0
{graph → agent}/templates/websearch_assistant.json +0 -0
{graph → agent}/test/client.py +2 -3
{graph → agent}/test/dsl_examples/categorize.json +0 -0
{graph → agent}/test/dsl_examples/customer_service.json +0 -0
{graph → agent}/test/dsl_examples/headhunter_zh.json +0 -0
{graph → agent}/test/dsl_examples/intergreper.json +0 -0
{graph → agent}/test/dsl_examples/interpreter.json +0 -0
{graph → agent}/test/dsl_examples/keyword_wikipedia_and_generate.json +0 -0
{graph → agent}/test/dsl_examples/retrieval_and_generate.json +0 -0
{graph → agent}/test/dsl_examples/retrieval_categorize_and_generate.json +0 -0
{graph → agent}/test/dsl_examples/retrieval_relevant_and_generate.json +0 -0
{graph → agent}/test/dsl_examples/retrieval_relevant_keyword_baidu_and_generate.json +0 -0
{graph → agent}/test/dsl_examples/retrieval_relevant_rewrite_and_generate.json +0 -0
api/apps/api_app.py +1 -3
api/apps/canvas_app.py +1 -4
api/apps/chunk_app.py +36 -10
api/apps/dataset_api.py +2 -1
api/db/__init__.py +1 -0
api/db/init_data.py +3 -3
api/db/services/dialog_service.py +8 -5

{graph → agent}/README.md RENAMED Viewed

File without changes

{graph → agent}/README_zh.md RENAMED Viewed

File without changes

{graph → agent}/__init__.py RENAMED Viewed

File without changes

{graph → agent}/canvas.py RENAMED Viewed

@@ -22,9 +22,9 @@ from functools import partial
 import pandas as pd
-from graph.component import component_class
-from graph.component.base import ComponentBase
-from graph.settings import flow_logger, DEBUG
 class Canvas(ABC):

 import pandas as pd
+from agent.component import component_class
+from agent.component.base import ComponentBase
+from agent.settings import flow_logger, DEBUG
 class Canvas(ABC):

{graph → agent}/component/__init__.py RENAMED Viewed

File without changes

{graph → agent}/component/answer.py RENAMED Viewed

@@ -19,7 +19,7 @@ from functools import partial
 import pandas as pd
-from graph.component.base import ComponentBase, ComponentParamBase
 class AnswerParam(ComponentParamBase):

 import pandas as pd
+from agent.component.base import ComponentBase, ComponentParamBase
 class AnswerParam(ComponentParamBase):

{graph → agent}/component/arxiv.py RENAMED Viewed

@@ -13,13 +13,11 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import random
 from abc import ABC
-from functools import partial
 import arxiv
 import pandas as pd
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class ArXivParam(ComponentParamBase):

 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from abc import ABC
 import arxiv
 import pandas as pd
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class ArXivParam(ComponentParamBase):

{graph → agent}/component/baidu.py RENAMED Viewed

@@ -19,8 +19,8 @@ from functools import partial
 import pandas as pd
 import requests
 import re
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class BaiduParam(ComponentParamBase):

 import pandas as pd
 import requests
 import re
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class BaiduParam(ComponentParamBase):

{graph → agent}/component/base.py RENAMED Viewed

@@ -23,8 +23,8 @@ from typing import List, Dict, Tuple, Union
 import pandas as pd
-from graph import settings
-from graph.settings import flow_logger, DEBUG
 _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
 _DEPRECATED_PARAMS = "_deprecated_params"

 import pandas as pd
+from agent import settings
+from agent.settings import flow_logger, DEBUG
 _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
 _DEPRECATED_PARAMS = "_deprecated_params"

{graph → agent}/component/begin.py RENAMED Viewed

@@ -13,11 +13,10 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import json
 from functools import partial
 import pandas as pd
-from graph.component.base import ComponentBase, ComponentParamBase
 class BeginParam(ComponentParamBase):

 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from functools import partial
 import pandas as pd
+from agent.component.base import ComponentBase, ComponentParamBase
 class BeginParam(ComponentParamBase):

{graph → agent}/component/bing.py RENAMED Viewed

@@ -16,8 +16,8 @@
 from abc import ABC
 import requests
 import pandas as pd
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class BingParam(ComponentParamBase):

 from abc import ABC
 import requests
 import pandas as pd
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class BingParam(ComponentParamBase):

{graph → agent}/component/categorize.py RENAMED Viewed

@@ -14,13 +14,10 @@
 #  limitations under the License.
 #
 from abc import ABC
-import pandas as pd
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
-from graph.component import GenerateParam, Generate
-from graph.settings import DEBUG
 class CategorizeParam(GenerateParam):

 #  limitations under the License.
 #
 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
+from agent.component import GenerateParam, Generate
+from agent.settings import DEBUG
 class CategorizeParam(GenerateParam):

{graph → agent}/component/cite.py RENAMED Viewed

@@ -21,7 +21,7 @@ from api.db import LLMType
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from api.settings import retrievaler
-from graph.component.base import ComponentBase, ComponentParamBase
 class CiteParam(ComponentParamBase):

 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from api.settings import retrievaler
+from agent.component.base import ComponentBase, ComponentParamBase
 class CiteParam(ComponentParamBase):

{graph → agent}/component/duckduckgo.py RENAMED Viewed

@@ -13,13 +13,11 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import random
 from abc import ABC
-from functools import partial
 from duckduckgo_search import DDGS
 import pandas as pd
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class DuckDuckGoParam(ComponentParamBase):

 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from abc import ABC
 from duckduckgo_search import DDGS
 import pandas as pd
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class DuckDuckGoParam(ComponentParamBase):

{graph → agent}/component/generate.py RENAMED Viewed

@@ -15,13 +15,11 @@
 #
 import re
 from functools import partial
 import pandas as pd
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
 from api.settings import retrievaler
-from graph.component.base import ComponentBase, ComponentParamBase
 class GenerateParam(ComponentParamBase):

 #
 import re
 from functools import partial
 import pandas as pd
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
 from api.settings import retrievaler
+from agent.component.base import ComponentBase, ComponentParamBase
 class GenerateParam(ComponentParamBase):

{graph → agent}/component/google.py RENAMED Viewed

@@ -16,8 +16,8 @@
 from abc import ABC
 from serpapi import GoogleSearch
 import pandas as pd
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class GoogleParam(ComponentParamBase):

 from abc import ABC
 from serpapi import GoogleSearch
 import pandas as pd
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class GoogleParam(ComponentParamBase):

{graph → agent}/component/googlescholar.py RENAMED Viewed

@@ -15,8 +15,8 @@
 #
 from abc import ABC
 import pandas as pd
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 from scholarly import scholarly

 #
 from abc import ABC
 import pandas as pd
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 from scholarly import scholarly

{graph → agent}/component/keyword.py RENAMED Viewed

@@ -17,8 +17,8 @@ import re
 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
-from graph.component import GenerateParam, Generate
-from graph.settings import DEBUG
 class KeywordExtractParam(GenerateParam):

 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
+from agent.component import GenerateParam, Generate
+from agent.settings import DEBUG
 class KeywordExtractParam(GenerateParam):

{graph → agent}/component/message.py RENAMED Viewed

@@ -16,10 +16,7 @@
 import random
 from abc import ABC
 from functools import partial
-import pandas as pd
-from graph.component.base import ComponentBase, ComponentParamBase
 class MessageParam(ComponentParamBase):

 import random
 from abc import ABC
 from functools import partial
+from agent.component.base import ComponentBase, ComponentParamBase
 class MessageParam(ComponentParamBase):

{graph → agent}/component/pubmed.py RENAMED Viewed

@@ -13,14 +13,12 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import random
 from abc import ABC
-from functools import partial
 from Bio import Entrez
 import pandas as pd
 import xml.etree.ElementTree as ET
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class PubMedParam(ComponentParamBase):

 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 from abc import ABC
 from Bio import Entrez
 import pandas as pd
 import xml.etree.ElementTree as ET
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class PubMedParam(ComponentParamBase):

{graph → agent}/component/relevant.py RENAMED Viewed

@@ -16,7 +16,7 @@
 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
-from graph.component import GenerateParam, Generate
 from rag.utils import num_tokens_from_string, encoder

 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
+from agent.component import GenerateParam, Generate
 from rag.utils import num_tokens_from_string, encoder

{graph → agent}/component/retrieval.py RENAMED Viewed

@@ -21,7 +21,7 @@ from api.db import LLMType
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from api.settings import retrievaler
-from graph.component.base import ComponentBase, ComponentParamBase
 class RetrievalParam(ComponentParamBase):

 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMBundle
 from api.settings import retrievaler
+from agent.component.base import ComponentBase, ComponentParamBase
 class RetrievalParam(ComponentParamBase):

{graph → agent}/component/rewrite.py RENAMED Viewed

@@ -16,7 +16,7 @@
 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
-from graph.component import GenerateParam, Generate
 class RewriteQuestionParam(GenerateParam):

 from abc import ABC
 from api.db import LLMType
 from api.db.services.llm_service import LLMBundle
+from agent.component import GenerateParam, Generate
 class RewriteQuestionParam(GenerateParam):

{graph → agent}/component/switch.py RENAMED Viewed

@@ -16,12 +16,7 @@
 from abc import ABC
 import pandas as pd
-from api.db import LLMType
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.llm_service import LLMBundle
-from api.settings import retrievaler
-from graph.component.base import ComponentBase, ComponentParamBase
 class SwitchParam(ComponentParamBase):

 from abc import ABC
 import pandas as pd
+from agent.component.base import ComponentBase, ComponentParamBase
 class SwitchParam(ComponentParamBase):

{graph → agent}/component/wikipedia.py RENAMED Viewed

@@ -18,8 +18,8 @@ from abc import ABC
 from functools import partial
 import wikipedia
 import pandas as pd
-from graph.settings import DEBUG
-from graph.component.base import ComponentBase, ComponentParamBase
 class WikipediaParam(ComponentParamBase):

 from functools import partial
 import wikipedia
 import pandas as pd
+from agent.settings import DEBUG
+from agent.component.base import ComponentBase, ComponentParamBase
 class WikipediaParam(ComponentParamBase):

{graph → agent}/settings.py RENAMED Viewed

File without changes

{graph → agent}/templates/HR_callout_zh.json RENAMED Viewed

File without changes

{graph → agent}/templates/customer_service.json RENAMED Viewed

File without changes

{graph → agent}/templates/general_chat_bot.json RENAMED Viewed

File without changes

{graph → agent}/templates/interpreter.json RENAMED Viewed

File without changes

{graph → agent}/templates/websearch_assistant.json RENAMED Viewed

File without changes

{graph → agent}/test/client.py RENAMED Viewed

@@ -16,9 +16,8 @@
 import argparse
 import os
 from functools import partial
-import readline
-from graph.canvas import Canvas
-from graph.settings import DEBUG
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()

 import argparse
 import os
 from functools import partial
+from agent.canvas import Canvas
+from agent.settings import DEBUG
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()

{graph → agent}/test/dsl_examples/categorize.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/customer_service.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/headhunter_zh.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/intergreper.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/interpreter.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/keyword_wikipedia_and_generate.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/retrieval_and_generate.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/retrieval_categorize_and_generate.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/retrieval_relevant_and_generate.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/retrieval_relevant_keyword_baidu_and_generate.json RENAMED Viewed

File without changes

{graph → agent}/test/dsl_examples/retrieval_relevant_rewrite_and_generate.json RENAMED Viewed

File without changes

api/apps/api_app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from datetime import datetime, timedelta
 from flask import request, Response
 from flask_login import login_required, current_user
-from api.db import FileType, ParserType, FileSource, LLMType
 from api.db.db_models import APIToken, API4Conversation, Task, File
 from api.db.services import duplicate_name
 from api.db.services.api_service import APITokenService, API4ConversationService
@@ -29,7 +29,6 @@ from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
 from api.db.services.file_service import FileService
 from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.llm_service import TenantLLMService
 from api.db.services.task_service import queue_tasks, TaskService
 from api.db.services.user_service import UserTenantService
 from api.settings import RetCode, retrievaler
@@ -38,7 +37,6 @@ from api.utils.api_utils import server_error_response, get_data_error_result, ge
 from itsdangerous import URLSafeTimedSerializer
 from api.utils.file_utils import filename_type, thumbnail
-from rag.nlp import keyword_extraction
 from rag.utils.minio_conn import MINIO

 from flask import request, Response
 from flask_login import login_required, current_user
+from api.db import FileType, ParserType, FileSource
 from api.db.db_models import APIToken, API4Conversation, Task, File
 from api.db.services import duplicate_name
 from api.db.services.api_service import APITokenService, API4ConversationService
 from api.db.services.file2document_service import File2DocumentService
 from api.db.services.file_service import FileService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.task_service import queue_tasks, TaskService
 from api.db.services.user_service import UserTenantService
 from api.settings import RetCode, retrievaler
 from itsdangerous import URLSafeTimedSerializer
 from api.utils.file_utils import filename_type, thumbnail
 from rag.utils.minio_conn import MINIO

api/apps/canvas_app.py CHANGED Viewed

@@ -15,15 +15,12 @@
 #
 import json
 from functools import partial
 from flask import request, Response
 from flask_login import login_required, current_user
-from api.db.db_models import UserCanvas
 from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
 from api.utils import get_uuid
 from api.utils.api_utils import get_json_result, server_error_response, validate_request
-from graph.canvas import Canvas
 @manager.route('/templates', methods=['GET'])

 #
 import json
 from functools import partial
 from flask import request, Response
 from flask_login import login_required, current_user
 from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
 from api.utils import get_uuid
 from api.utils.api_utils import get_json_result, server_error_response, validate_request
+from agent.canvas import Canvas
 @manager.route('/templates', methods=['GET'])

api/apps/chunk_app.py CHANGED Viewed

@@ -14,6 +14,8 @@
 #  limitations under the License.
 #
 import datetime
 from flask import request
 from flask_login import login_required, current_user
@@ -29,7 +31,7 @@ from api.db.services.llm_service import TenantLLMService
 from api.db.services.user_service import UserTenantService
 from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
 from api.db.services.document_service import DocumentService
-from api.settings import RetCode, retrievaler
 from api.utils.api_utils import get_json_result
 import hashlib
 import re
@@ -61,7 +63,8 @@ def list_chunk():
         for id in sres.ids:
             d = {
                 "chunk_id": id,
-                "content_with_weight": rmSpace(sres.highlight[id]) if question and id in  sres.highlight else sres.field[id].get(
                     "content_with_weight", ""),
                 "doc_id": sres.field[id]["doc_id"],
                 "docnm_kwd": sres.field[id]["docnm_kwd"],
@@ -136,11 +139,11 @@ def set():
         tenant_id = DocumentService.get_tenant_id(req["doc_id"])
         if not tenant_id:
             return get_data_error_result(retmsg="Tenant not found!")
         embd_id = DocumentService.get_embd_id(req["doc_id"])
         embd_mdl = TenantLLMService.model_instance(
             tenant_id, LLMType.EMBEDDING.value, embd_id)
         e, doc = DocumentService.get_by_id(req["doc_id"])
         if not e:
             return get_data_error_result(retmsg="Document not found!")
@@ -185,7 +188,7 @@ def switch():
 @manager.route('/rm', methods=['POST'])
 @login_required
-@validate_request("chunk_ids","doc_id")
 def rm():
     req = request.json
     try:
@@ -230,11 +233,11 @@ def create():
         tenant_id = DocumentService.get_tenant_id(req["doc_id"])
         if not tenant_id:
             return get_data_error_result(retmsg="Tenant not found!")
         embd_id = DocumentService.get_embd_id(req["doc_id"])
         embd_mdl = TenantLLMService.model_instance(
             tenant_id, LLMType.EMBEDDING.value, embd_id)
         v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
         v = 0.1 * v[0] + 0.9 * v[1]
         d["q_%d_vec" % len(v)] = v.tolist()
@@ -277,9 +280,10 @@ def retrieval_test():
             chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
             question += keyword_extraction(chat_mdl, question)
-        ranks = retrievaler.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size,
-                                      similarity_threshold, vector_similarity_weight, top,
-                                      doc_ids, rerank_mdl=rerank_mdl)
         for c in ranks["chunks"]:
             if "vector" in c:
                 del c["vector"]
@@ -290,3 +294,25 @@ def retrieval_test():
             return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
                                    retcode=RetCode.DATA_ERROR)
         return server_error_response(e)

 #  limitations under the License.
 #
 import datetime
+import json
+import traceback
 from flask import request
 from flask_login import login_required, current_user
 from api.db.services.user_service import UserTenantService
 from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
 from api.db.services.document_service import DocumentService
+from api.settings import RetCode, retrievaler, kg_retrievaler
 from api.utils.api_utils import get_json_result
 import hashlib
 import re
         for id in sres.ids:
             d = {
                 "chunk_id": id,
+                "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
+                    id].get(
                     "content_with_weight", ""),
                 "doc_id": sres.field[id]["doc_id"],
                 "docnm_kwd": sres.field[id]["docnm_kwd"],
         tenant_id = DocumentService.get_tenant_id(req["doc_id"])
         if not tenant_id:
             return get_data_error_result(retmsg="Tenant not found!")
         embd_id = DocumentService.get_embd_id(req["doc_id"])
         embd_mdl = TenantLLMService.model_instance(
             tenant_id, LLMType.EMBEDDING.value, embd_id)
         e, doc = DocumentService.get_by_id(req["doc_id"])
         if not e:
             return get_data_error_result(retmsg="Document not found!")
 @manager.route('/rm', methods=['POST'])
 @login_required
+@validate_request("chunk_ids", "doc_id")
 def rm():
     req = request.json
     try:
         tenant_id = DocumentService.get_tenant_id(req["doc_id"])
         if not tenant_id:
             return get_data_error_result(retmsg="Tenant not found!")
         embd_id = DocumentService.get_embd_id(req["doc_id"])
         embd_mdl = TenantLLMService.model_instance(
             tenant_id, LLMType.EMBEDDING.value, embd_id)
         v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
         v = 0.1 * v[0] + 0.9 * v[1]
         d["q_%d_vec" % len(v)] = v.tolist()
             chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
             question += keyword_extraction(chat_mdl, question)
+        retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
+        ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size,
+                               similarity_threshold, vector_similarity_weight, top,
+                               doc_ids, rerank_mdl=rerank_mdl)
         for c in ranks["chunks"]:
             if "vector" in c:
                 del c["vector"]
             return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
                                    retcode=RetCode.DATA_ERROR)
         return server_error_response(e)
+@manager.route('/knowledge_graph', methods=['GET'])
+@login_required
+def knowledge_graph():
+    doc_id = request.args["doc_id"]
+    req = {
+        "doc_ids":[doc_id],
+        "knowledge_graph_kwd": ["graph", "mind_map"]
+    }
+    tenant_id = DocumentService.get_tenant_id(doc_id)
+    sres = retrievaler.search(req, search.index_name(tenant_id))
+    obj = {"graph": {}, "mind_map": {}}
+    for id in sres.ids[:2]:
+        ty = sres.field[id]["knowledge_graph_kwd"]
+        try:
+            obj[ty] = json.loads(sres.field[id]["content_with_weight"])
+        except Exception as e:
+            print(traceback.format_exc(), flush=True)
+    return get_json_result(data=obj)

api/apps/dataset_api.py CHANGED Viewed

@@ -623,7 +623,7 @@ def doc_parse_callback(doc_id, prog=None, msg=""):
     if cancel:
         raise Exception("The parsing process has been cancelled!")
 def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
     match parser_name:
         case "book":
@@ -656,6 +656,7 @@ def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
             return False
     return True
 @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])

     if cancel:
         raise Exception("The parsing process has been cancelled!")
+"""
 def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
     match parser_name:
         case "book":
             return False
     return True
+    """
 @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])

api/db/__init__.py CHANGED Viewed

@@ -85,6 +85,7 @@ class ParserType(StrEnum):
     PICTURE = "picture"
     ONE = "one"
     AUDIO = "audio"
 class FileSource(StrEnum):

     PICTURE = "picture"
     ONE = "one"
     AUDIO = "audio"
+    KG = "knowledge_graph"
 class FileSource(StrEnum):

api/db/init_data.py CHANGED Viewed

@@ -122,7 +122,7 @@ def init_llm_factory():
     LLMService.filter_delete([LLMService.model.fid == "QAnything"])
     TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
     TenantService.filter_update([1 == 1], {
-        "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio"})
     ## insert openai two embedding models to the current openai user.
     print("Start to insert 2 OpenAI embedding models...")
     tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
@@ -145,7 +145,7 @@ def init_llm_factory():
     """
     drop table llm;
     drop table llm_factories;
-    update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio';
     alter table knowledgebase modify avatar longtext;
     alter table user modify avatar longtext;
     alter table dialog modify icon longtext;
@@ -153,7 +153,7 @@ def init_llm_factory():
 def add_graph_templates():
-    dir = os.path.join(get_project_base_directory(), "graph", "templates")
     for fnm in os.listdir(dir):
         try:
             cnvs = json.load(open(os.path.join(dir, fnm), "r"))

     LLMService.filter_delete([LLMService.model.fid == "QAnything"])
     TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
     TenantService.filter_update([1 == 1], {
+        "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph"})
     ## insert openai two embedding models to the current openai user.
     print("Start to insert 2 OpenAI embedding models...")
     tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
     """
     drop table llm;
     drop table llm_factories;
+    update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph';
     alter table knowledgebase modify avatar longtext;
     alter table user modify avatar longtext;
     alter table dialog modify icon longtext;
 def add_graph_templates():
+    dir = os.path.join(get_project_base_directory(), "agent", "templates")
     for fnm in os.listdir(dir):
         try:
             cnvs = json.load(open(os.path.join(dir, fnm), "r"))

api/db/services/dialog_service.py CHANGED Viewed

@@ -18,12 +18,12 @@ import json
 import re
 from copy import deepcopy
-from api.db import LLMType
 from api.db.db_models import Dialog, Conversation
 from api.db.services.common_service import CommonService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
-from api.settings import chat_logger, retrievaler
 from rag.app.resume import forbidden_select_fields4resume
 from rag.nlp import keyword_extraction
 from rag.nlp.search import index_name
@@ -101,6 +101,9 @@ def chat(dialog, messages, stream=True, **kwargs):
         yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
         return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
     questions = [m["content"] for m in messages if m["role"] == "user"]
     embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
     if llm_id2llm_type(dialog.llm_id) == "image2text":
@@ -138,7 +141,7 @@ def chat(dialog, messages, stream=True, **kwargs):
     else:
         if prompt_config.get("keyword", False):
             questions[-1] += keyword_extraction(chat_mdl, questions[-1])
-        kbinfos = retrievaler.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
                                         dialog.similarity_threshold,
                                         dialog.vector_similarity_weight,
                                         doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
@@ -147,7 +150,7 @@ def chat(dialog, messages, stream=True, **kwargs):
     #self-rag
     if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges):
         questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1])
-        kbinfos = retrievaler.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
                                         dialog.similarity_threshold,
                                         dialog.vector_similarity_weight,
                                         doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
@@ -179,7 +182,7 @@ def chat(dialog, messages, stream=True, **kwargs):
         nonlocal prompt_config, knowledges, kwargs, kbinfos
         refs = []
         if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
-            answer, idx = retrievaler.insert_citations(answer,
                                                        [ck["content_ltks"]
                                                         for ck in kbinfos["chunks"]],
                                                        [ck["vector"]

 import re
 from copy import deepcopy
+from api.db import LLMType, ParserType
 from api.db.db_models import Dialog, Conversation
 from api.db.services.common_service import CommonService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
+from api.settings import chat_logger, retrievaler, kg_retrievaler
 from rag.app.resume import forbidden_select_fields4resume
 from rag.nlp import keyword_extraction
 from rag.nlp.search import index_name
         yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
         return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
+    is_kg = all([kb.parser_id == ParserType.KG for kb in kbs])
+    retr = retrievaler if not is_kg else kg_retrievaler
     questions = [m["content"] for m in messages if m["role"] == "user"]
     embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
     if llm_id2llm_type(dialog.llm_id) == "image2text":
     else:
         if prompt_config.get("keyword", False):
             questions[-1] += keyword_extraction(chat_mdl, questions[-1])
+        kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
                                         dialog.similarity_threshold,
                                         dialog.vector_similarity_weight,
                                         doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
     #self-rag
     if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges):
         questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1])
+        kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
                                         dialog.similarity_threshold,
                                         dialog.vector_similarity_weight,
                                         doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
         nonlocal prompt_config, knowledges, kwargs, kbinfos
         refs = []
         if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
+            answer, idx = retr.insert_citations(answer,
                                                        [ck["content_ltks"]
                                                         for ck in kbinfos["chunks"]],
                                                        [ck["vector"]