Kevin Hu
commited on
Commit
Β·
6054f54
1
Parent(s):
8d4e686
Add graphrag (#1793)
Browse files### What problem does this PR solve?
#1594
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
This view is limited to 50 files because it contains too many changes. Β
See raw diff
- {graph β agent}/README.md +0 -0
- {graph β agent}/README_zh.md +0 -0
- {graph β agent}/__init__.py +0 -0
- {graph β agent}/canvas.py +3 -3
- {graph β agent}/component/__init__.py +0 -0
- {graph β agent}/component/answer.py +1 -1
- {graph β agent}/component/arxiv.py +2 -4
- {graph β agent}/component/baidu.py +2 -2
- {graph β agent}/component/base.py +2 -2
- {graph β agent}/component/begin.py +2 -3
- {graph β agent}/component/bing.py +2 -2
- {graph β agent}/component/categorize.py +2 -5
- {graph β agent}/component/cite.py +1 -1
- {graph β agent}/component/duckduckgo.py +2 -4
- {graph β agent}/component/generate.py +1 -3
- {graph β agent}/component/google.py +2 -2
- {graph β agent}/component/googlescholar.py +2 -2
- {graph β agent}/component/keyword.py +2 -2
- {graph β agent}/component/message.py +1 -4
- {graph β agent}/component/pubmed.py +2 -4
- {graph β agent}/component/relevant.py +1 -1
- {graph β agent}/component/retrieval.py +1 -1
- {graph β agent}/component/rewrite.py +1 -1
- {graph β agent}/component/switch.py +1 -6
- {graph β agent}/component/wikipedia.py +2 -2
- {graph β agent}/settings.py +0 -0
- {graph β agent}/templates/HR_callout_zh.json +0 -0
- {graph β agent}/templates/customer_service.json +0 -0
- {graph β agent}/templates/general_chat_bot.json +0 -0
- {graph β agent}/templates/interpreter.json +0 -0
- {graph β agent}/templates/websearch_assistant.json +0 -0
- {graph β agent}/test/client.py +2 -3
- {graph β agent}/test/dsl_examples/categorize.json +0 -0
- {graph β agent}/test/dsl_examples/customer_service.json +0 -0
- {graph β agent}/test/dsl_examples/headhunter_zh.json +0 -0
- {graph β agent}/test/dsl_examples/intergreper.json +0 -0
- {graph β agent}/test/dsl_examples/interpreter.json +0 -0
- {graph β agent}/test/dsl_examples/keyword_wikipedia_and_generate.json +0 -0
- {graph β agent}/test/dsl_examples/retrieval_and_generate.json +0 -0
- {graph β agent}/test/dsl_examples/retrieval_categorize_and_generate.json +0 -0
- {graph β agent}/test/dsl_examples/retrieval_relevant_and_generate.json +0 -0
- {graph β agent}/test/dsl_examples/retrieval_relevant_keyword_baidu_and_generate.json +0 -0
- {graph β agent}/test/dsl_examples/retrieval_relevant_rewrite_and_generate.json +0 -0
- api/apps/api_app.py +1 -3
- api/apps/canvas_app.py +1 -4
- api/apps/chunk_app.py +36 -10
- api/apps/dataset_api.py +2 -1
- api/db/__init__.py +1 -0
- api/db/init_data.py +3 -3
- api/db/services/dialog_service.py +8 -5
{graph β agent}/README.md
RENAMED
File without changes
|
{graph β agent}/README_zh.md
RENAMED
File without changes
|
{graph β agent}/__init__.py
RENAMED
File without changes
|
{graph β agent}/canvas.py
RENAMED
@@ -22,9 +22,9 @@ from functools import partial
|
|
22 |
|
23 |
import pandas as pd
|
24 |
|
25 |
-
from
|
26 |
-
from
|
27 |
-
from
|
28 |
|
29 |
|
30 |
class Canvas(ABC):
|
|
|
22 |
|
23 |
import pandas as pd
|
24 |
|
25 |
+
from agent.component import component_class
|
26 |
+
from agent.component.base import ComponentBase
|
27 |
+
from agent.settings import flow_logger, DEBUG
|
28 |
|
29 |
|
30 |
class Canvas(ABC):
|
{graph β agent}/component/__init__.py
RENAMED
File without changes
|
{graph β agent}/component/answer.py
RENAMED
@@ -19,7 +19,7 @@ from functools import partial
|
|
19 |
|
20 |
import pandas as pd
|
21 |
|
22 |
-
from
|
23 |
|
24 |
|
25 |
class AnswerParam(ComponentParamBase):
|
|
|
19 |
|
20 |
import pandas as pd
|
21 |
|
22 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
23 |
|
24 |
|
25 |
class AnswerParam(ComponentParamBase):
|
{graph β agent}/component/arxiv.py
RENAMED
@@ -13,13 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
-
import random
|
17 |
from abc import ABC
|
18 |
-
from functools import partial
|
19 |
import arxiv
|
20 |
import pandas as pd
|
21 |
-
from
|
22 |
-
from
|
23 |
|
24 |
|
25 |
class ArXivParam(ComponentParamBase):
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
|
|
17 |
import arxiv
|
18 |
import pandas as pd
|
19 |
+
from agent.settings import DEBUG
|
20 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
21 |
|
22 |
|
23 |
class ArXivParam(ComponentParamBase):
|
{graph β agent}/component/baidu.py
RENAMED
@@ -19,8 +19,8 @@ from functools import partial
|
|
19 |
import pandas as pd
|
20 |
import requests
|
21 |
import re
|
22 |
-
from
|
23 |
-
from
|
24 |
|
25 |
|
26 |
class BaiduParam(ComponentParamBase):
|
|
|
19 |
import pandas as pd
|
20 |
import requests
|
21 |
import re
|
22 |
+
from agent.settings import DEBUG
|
23 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
24 |
|
25 |
|
26 |
class BaiduParam(ComponentParamBase):
|
{graph β agent}/component/base.py
RENAMED
@@ -23,8 +23,8 @@ from typing import List, Dict, Tuple, Union
|
|
23 |
|
24 |
import pandas as pd
|
25 |
|
26 |
-
from
|
27 |
-
from
|
28 |
|
29 |
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
30 |
_DEPRECATED_PARAMS = "_deprecated_params"
|
|
|
23 |
|
24 |
import pandas as pd
|
25 |
|
26 |
+
from agent import settings
|
27 |
+
from agent.settings import flow_logger, DEBUG
|
28 |
|
29 |
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
30 |
_DEPRECATED_PARAMS = "_deprecated_params"
|
{graph β agent}/component/begin.py
RENAMED
@@ -13,11 +13,10 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
-
import json
|
17 |
from functools import partial
|
18 |
-
|
19 |
import pandas as pd
|
20 |
-
from
|
|
|
21 |
|
22 |
class BeginParam(ComponentParamBase):
|
23 |
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from functools import partial
|
|
|
17 |
import pandas as pd
|
18 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
19 |
+
|
20 |
|
21 |
class BeginParam(ComponentParamBase):
|
22 |
|
{graph β agent}/component/bing.py
RENAMED
@@ -16,8 +16,8 @@
|
|
16 |
from abc import ABC
|
17 |
import requests
|
18 |
import pandas as pd
|
19 |
-
from
|
20 |
-
from
|
21 |
|
22 |
|
23 |
class BingParam(ComponentParamBase):
|
|
|
16 |
from abc import ABC
|
17 |
import requests
|
18 |
import pandas as pd
|
19 |
+
from agent.settings import DEBUG
|
20 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
21 |
|
22 |
|
23 |
class BingParam(ComponentParamBase):
|
{graph β agent}/component/categorize.py
RENAMED
@@ -14,13 +14,10 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
from abc import ABC
|
17 |
-
|
18 |
-
import pandas as pd
|
19 |
-
|
20 |
from api.db import LLMType
|
21 |
from api.db.services.llm_service import LLMBundle
|
22 |
-
from
|
23 |
-
from
|
24 |
|
25 |
|
26 |
class CategorizeParam(GenerateParam):
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
from abc import ABC
|
|
|
|
|
|
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
+
from agent.component import GenerateParam, Generate
|
20 |
+
from agent.settings import DEBUG
|
21 |
|
22 |
|
23 |
class CategorizeParam(GenerateParam):
|
{graph β agent}/component/cite.py
RENAMED
@@ -21,7 +21,7 @@ from api.db import LLMType
|
|
21 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
22 |
from api.db.services.llm_service import LLMBundle
|
23 |
from api.settings import retrievaler
|
24 |
-
from
|
25 |
|
26 |
|
27 |
class CiteParam(ComponentParamBase):
|
|
|
21 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
22 |
from api.db.services.llm_service import LLMBundle
|
23 |
from api.settings import retrievaler
|
24 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
25 |
|
26 |
|
27 |
class CiteParam(ComponentParamBase):
|
{graph β agent}/component/duckduckgo.py
RENAMED
@@ -13,13 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
-
import random
|
17 |
from abc import ABC
|
18 |
-
from functools import partial
|
19 |
from duckduckgo_search import DDGS
|
20 |
import pandas as pd
|
21 |
-
from
|
22 |
-
from
|
23 |
|
24 |
|
25 |
class DuckDuckGoParam(ComponentParamBase):
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
|
|
17 |
from duckduckgo_search import DDGS
|
18 |
import pandas as pd
|
19 |
+
from agent.settings import DEBUG
|
20 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
21 |
|
22 |
|
23 |
class DuckDuckGoParam(ComponentParamBase):
|
{graph β agent}/component/generate.py
RENAMED
@@ -15,13 +15,11 @@
|
|
15 |
#
|
16 |
import re
|
17 |
from functools import partial
|
18 |
-
|
19 |
import pandas as pd
|
20 |
-
|
21 |
from api.db import LLMType
|
22 |
from api.db.services.llm_service import LLMBundle
|
23 |
from api.settings import retrievaler
|
24 |
-
from
|
25 |
|
26 |
|
27 |
class GenerateParam(ComponentParamBase):
|
|
|
15 |
#
|
16 |
import re
|
17 |
from functools import partial
|
|
|
18 |
import pandas as pd
|
|
|
19 |
from api.db import LLMType
|
20 |
from api.db.services.llm_service import LLMBundle
|
21 |
from api.settings import retrievaler
|
22 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
23 |
|
24 |
|
25 |
class GenerateParam(ComponentParamBase):
|
{graph β agent}/component/google.py
RENAMED
@@ -16,8 +16,8 @@
|
|
16 |
from abc import ABC
|
17 |
from serpapi import GoogleSearch
|
18 |
import pandas as pd
|
19 |
-
from
|
20 |
-
from
|
21 |
|
22 |
|
23 |
class GoogleParam(ComponentParamBase):
|
|
|
16 |
from abc import ABC
|
17 |
from serpapi import GoogleSearch
|
18 |
import pandas as pd
|
19 |
+
from agent.settings import DEBUG
|
20 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
21 |
|
22 |
|
23 |
class GoogleParam(ComponentParamBase):
|
{graph β agent}/component/googlescholar.py
RENAMED
@@ -15,8 +15,8 @@
|
|
15 |
#
|
16 |
from abc import ABC
|
17 |
import pandas as pd
|
18 |
-
from
|
19 |
-
from
|
20 |
from scholarly import scholarly
|
21 |
|
22 |
|
|
|
15 |
#
|
16 |
from abc import ABC
|
17 |
import pandas as pd
|
18 |
+
from agent.settings import DEBUG
|
19 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
from scholarly import scholarly
|
21 |
|
22 |
|
{graph β agent}/component/keyword.py
RENAMED
@@ -17,8 +17,8 @@ import re
|
|
17 |
from abc import ABC
|
18 |
from api.db import LLMType
|
19 |
from api.db.services.llm_service import LLMBundle
|
20 |
-
from
|
21 |
-
from
|
22 |
|
23 |
|
24 |
class KeywordExtractParam(GenerateParam):
|
|
|
17 |
from abc import ABC
|
18 |
from api.db import LLMType
|
19 |
from api.db.services.llm_service import LLMBundle
|
20 |
+
from agent.component import GenerateParam, Generate
|
21 |
+
from agent.settings import DEBUG
|
22 |
|
23 |
|
24 |
class KeywordExtractParam(GenerateParam):
|
{graph β agent}/component/message.py
RENAMED
@@ -16,10 +16,7 @@
|
|
16 |
import random
|
17 |
from abc import ABC
|
18 |
from functools import partial
|
19 |
-
|
20 |
-
import pandas as pd
|
21 |
-
|
22 |
-
from graph.component.base import ComponentBase, ComponentParamBase
|
23 |
|
24 |
|
25 |
class MessageParam(ComponentParamBase):
|
|
|
16 |
import random
|
17 |
from abc import ABC
|
18 |
from functools import partial
|
19 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
class MessageParam(ComponentParamBase):
|
{graph β agent}/component/pubmed.py
RENAMED
@@ -13,14 +13,12 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
-
import random
|
17 |
from abc import ABC
|
18 |
-
from functools import partial
|
19 |
from Bio import Entrez
|
20 |
import pandas as pd
|
21 |
import xml.etree.ElementTree as ET
|
22 |
-
from
|
23 |
-
from
|
24 |
|
25 |
|
26 |
class PubMedParam(ComponentParamBase):
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
|
|
17 |
from Bio import Entrez
|
18 |
import pandas as pd
|
19 |
import xml.etree.ElementTree as ET
|
20 |
+
from agent.settings import DEBUG
|
21 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
22 |
|
23 |
|
24 |
class PubMedParam(ComponentParamBase):
|
{graph β agent}/component/relevant.py
RENAMED
@@ -16,7 +16,7 @@
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
-
from
|
20 |
from rag.utils import num_tokens_from_string, encoder
|
21 |
|
22 |
|
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
+
from agent.component import GenerateParam, Generate
|
20 |
from rag.utils import num_tokens_from_string, encoder
|
21 |
|
22 |
|
{graph β agent}/component/retrieval.py
RENAMED
@@ -21,7 +21,7 @@ from api.db import LLMType
|
|
21 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
22 |
from api.db.services.llm_service import LLMBundle
|
23 |
from api.settings import retrievaler
|
24 |
-
from
|
25 |
|
26 |
|
27 |
class RetrievalParam(ComponentParamBase):
|
|
|
21 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
22 |
from api.db.services.llm_service import LLMBundle
|
23 |
from api.settings import retrievaler
|
24 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
25 |
|
26 |
|
27 |
class RetrievalParam(ComponentParamBase):
|
{graph β agent}/component/rewrite.py
RENAMED
@@ -16,7 +16,7 @@
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
-
from
|
20 |
|
21 |
|
22 |
class RewriteQuestionParam(GenerateParam):
|
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
+
from agent.component import GenerateParam, Generate
|
20 |
|
21 |
|
22 |
class RewriteQuestionParam(GenerateParam):
|
{graph β agent}/component/switch.py
RENAMED
@@ -16,12 +16,7 @@
|
|
16 |
from abc import ABC
|
17 |
|
18 |
import pandas as pd
|
19 |
-
|
20 |
-
from api.db import LLMType
|
21 |
-
from api.db.services.knowledgebase_service import KnowledgebaseService
|
22 |
-
from api.db.services.llm_service import LLMBundle
|
23 |
-
from api.settings import retrievaler
|
24 |
-
from graph.component.base import ComponentBase, ComponentParamBase
|
25 |
|
26 |
|
27 |
class SwitchParam(ComponentParamBase):
|
|
|
16 |
from abc import ABC
|
17 |
|
18 |
import pandas as pd
|
19 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
class SwitchParam(ComponentParamBase):
|
{graph β agent}/component/wikipedia.py
RENAMED
@@ -18,8 +18,8 @@ from abc import ABC
|
|
18 |
from functools import partial
|
19 |
import wikipedia
|
20 |
import pandas as pd
|
21 |
-
from
|
22 |
-
from
|
23 |
|
24 |
|
25 |
class WikipediaParam(ComponentParamBase):
|
|
|
18 |
from functools import partial
|
19 |
import wikipedia
|
20 |
import pandas as pd
|
21 |
+
from agent.settings import DEBUG
|
22 |
+
from agent.component.base import ComponentBase, ComponentParamBase
|
23 |
|
24 |
|
25 |
class WikipediaParam(ComponentParamBase):
|
{graph β agent}/settings.py
RENAMED
File without changes
|
{graph β agent}/templates/HR_callout_zh.json
RENAMED
File without changes
|
{graph β agent}/templates/customer_service.json
RENAMED
File without changes
|
{graph β agent}/templates/general_chat_bot.json
RENAMED
File without changes
|
{graph β agent}/templates/interpreter.json
RENAMED
File without changes
|
{graph β agent}/templates/websearch_assistant.json
RENAMED
File without changes
|
{graph β agent}/test/client.py
RENAMED
@@ -16,9 +16,8 @@
|
|
16 |
import argparse
|
17 |
import os
|
18 |
from functools import partial
|
19 |
-
import
|
20 |
-
from
|
21 |
-
from graph.settings import DEBUG
|
22 |
|
23 |
if __name__ == '__main__':
|
24 |
parser = argparse.ArgumentParser()
|
|
|
16 |
import argparse
|
17 |
import os
|
18 |
from functools import partial
|
19 |
+
from agent.canvas import Canvas
|
20 |
+
from agent.settings import DEBUG
|
|
|
21 |
|
22 |
if __name__ == '__main__':
|
23 |
parser = argparse.ArgumentParser()
|
{graph β agent}/test/dsl_examples/categorize.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/customer_service.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/headhunter_zh.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/intergreper.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/interpreter.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/keyword_wikipedia_and_generate.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/retrieval_and_generate.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/retrieval_categorize_and_generate.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/retrieval_relevant_and_generate.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/retrieval_relevant_keyword_baidu_and_generate.json
RENAMED
File without changes
|
{graph β agent}/test/dsl_examples/retrieval_relevant_rewrite_and_generate.json
RENAMED
File without changes
|
api/apps/api_app.py
CHANGED
@@ -20,7 +20,7 @@ from datetime import datetime, timedelta
|
|
20 |
from flask import request, Response
|
21 |
from flask_login import login_required, current_user
|
22 |
|
23 |
-
from api.db import FileType, ParserType, FileSource
|
24 |
from api.db.db_models import APIToken, API4Conversation, Task, File
|
25 |
from api.db.services import duplicate_name
|
26 |
from api.db.services.api_service import APITokenService, API4ConversationService
|
@@ -29,7 +29,6 @@ from api.db.services.document_service import DocumentService
|
|
29 |
from api.db.services.file2document_service import File2DocumentService
|
30 |
from api.db.services.file_service import FileService
|
31 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
32 |
-
from api.db.services.llm_service import TenantLLMService
|
33 |
from api.db.services.task_service import queue_tasks, TaskService
|
34 |
from api.db.services.user_service import UserTenantService
|
35 |
from api.settings import RetCode, retrievaler
|
@@ -38,7 +37,6 @@ from api.utils.api_utils import server_error_response, get_data_error_result, ge
|
|
38 |
from itsdangerous import URLSafeTimedSerializer
|
39 |
|
40 |
from api.utils.file_utils import filename_type, thumbnail
|
41 |
-
from rag.nlp import keyword_extraction
|
42 |
from rag.utils.minio_conn import MINIO
|
43 |
|
44 |
|
|
|
20 |
from flask import request, Response
|
21 |
from flask_login import login_required, current_user
|
22 |
|
23 |
+
from api.db import FileType, ParserType, FileSource
|
24 |
from api.db.db_models import APIToken, API4Conversation, Task, File
|
25 |
from api.db.services import duplicate_name
|
26 |
from api.db.services.api_service import APITokenService, API4ConversationService
|
|
|
29 |
from api.db.services.file2document_service import File2DocumentService
|
30 |
from api.db.services.file_service import FileService
|
31 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
|
32 |
from api.db.services.task_service import queue_tasks, TaskService
|
33 |
from api.db.services.user_service import UserTenantService
|
34 |
from api.settings import RetCode, retrievaler
|
|
|
37 |
from itsdangerous import URLSafeTimedSerializer
|
38 |
|
39 |
from api.utils.file_utils import filename_type, thumbnail
|
|
|
40 |
from rag.utils.minio_conn import MINIO
|
41 |
|
42 |
|
api/apps/canvas_app.py
CHANGED
@@ -15,15 +15,12 @@
|
|
15 |
#
|
16 |
import json
|
17 |
from functools import partial
|
18 |
-
|
19 |
from flask import request, Response
|
20 |
from flask_login import login_required, current_user
|
21 |
-
|
22 |
-
from api.db.db_models import UserCanvas
|
23 |
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
|
24 |
from api.utils import get_uuid
|
25 |
from api.utils.api_utils import get_json_result, server_error_response, validate_request
|
26 |
-
from
|
27 |
|
28 |
|
29 |
@manager.route('/templates', methods=['GET'])
|
|
|
15 |
#
|
16 |
import json
|
17 |
from functools import partial
|
|
|
18 |
from flask import request, Response
|
19 |
from flask_login import login_required, current_user
|
|
|
|
|
20 |
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService
|
21 |
from api.utils import get_uuid
|
22 |
from api.utils.api_utils import get_json_result, server_error_response, validate_request
|
23 |
+
from agent.canvas import Canvas
|
24 |
|
25 |
|
26 |
@manager.route('/templates', methods=['GET'])
|
api/apps/chunk_app.py
CHANGED
@@ -14,6 +14,8 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
import datetime
|
|
|
|
|
17 |
|
18 |
from flask import request
|
19 |
from flask_login import login_required, current_user
|
@@ -29,7 +31,7 @@ from api.db.services.llm_service import TenantLLMService
|
|
29 |
from api.db.services.user_service import UserTenantService
|
30 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
31 |
from api.db.services.document_service import DocumentService
|
32 |
-
from api.settings import RetCode, retrievaler
|
33 |
from api.utils.api_utils import get_json_result
|
34 |
import hashlib
|
35 |
import re
|
@@ -61,7 +63,8 @@ def list_chunk():
|
|
61 |
for id in sres.ids:
|
62 |
d = {
|
63 |
"chunk_id": id,
|
64 |
-
"content_with_weight": rmSpace(sres.highlight[id]) if question and id in
|
|
|
65 |
"content_with_weight", ""),
|
66 |
"doc_id": sres.field[id]["doc_id"],
|
67 |
"docnm_kwd": sres.field[id]["docnm_kwd"],
|
@@ -136,11 +139,11 @@ def set():
|
|
136 |
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
137 |
if not tenant_id:
|
138 |
return get_data_error_result(retmsg="Tenant not found!")
|
139 |
-
|
140 |
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
141 |
embd_mdl = TenantLLMService.model_instance(
|
142 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
143 |
-
|
144 |
e, doc = DocumentService.get_by_id(req["doc_id"])
|
145 |
if not e:
|
146 |
return get_data_error_result(retmsg="Document not found!")
|
@@ -185,7 +188,7 @@ def switch():
|
|
185 |
|
186 |
@manager.route('/rm', methods=['POST'])
|
187 |
@login_required
|
188 |
-
@validate_request("chunk_ids","doc_id")
|
189 |
def rm():
|
190 |
req = request.json
|
191 |
try:
|
@@ -230,11 +233,11 @@ def create():
|
|
230 |
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
231 |
if not tenant_id:
|
232 |
return get_data_error_result(retmsg="Tenant not found!")
|
233 |
-
|
234 |
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
235 |
embd_mdl = TenantLLMService.model_instance(
|
236 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
237 |
-
|
238 |
v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
|
239 |
v = 0.1 * v[0] + 0.9 * v[1]
|
240 |
d["q_%d_vec" % len(v)] = v.tolist()
|
@@ -277,9 +280,10 @@ def retrieval_test():
|
|
277 |
chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
|
278 |
question += keyword_extraction(chat_mdl, question)
|
279 |
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
283 |
for c in ranks["chunks"]:
|
284 |
if "vector" in c:
|
285 |
del c["vector"]
|
@@ -290,3 +294,25 @@ def retrieval_test():
|
|
290 |
return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
|
291 |
retcode=RetCode.DATA_ERROR)
|
292 |
return server_error_response(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
import datetime
|
17 |
+
import json
|
18 |
+
import traceback
|
19 |
|
20 |
from flask import request
|
21 |
from flask_login import login_required, current_user
|
|
|
31 |
from api.db.services.user_service import UserTenantService
|
32 |
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
|
33 |
from api.db.services.document_service import DocumentService
|
34 |
+
from api.settings import RetCode, retrievaler, kg_retrievaler
|
35 |
from api.utils.api_utils import get_json_result
|
36 |
import hashlib
|
37 |
import re
|
|
|
63 |
for id in sres.ids:
|
64 |
d = {
|
65 |
"chunk_id": id,
|
66 |
+
"content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
|
67 |
+
id].get(
|
68 |
"content_with_weight", ""),
|
69 |
"doc_id": sres.field[id]["doc_id"],
|
70 |
"docnm_kwd": sres.field[id]["docnm_kwd"],
|
|
|
139 |
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
140 |
if not tenant_id:
|
141 |
return get_data_error_result(retmsg="Tenant not found!")
|
142 |
+
|
143 |
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
144 |
embd_mdl = TenantLLMService.model_instance(
|
145 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
146 |
+
|
147 |
e, doc = DocumentService.get_by_id(req["doc_id"])
|
148 |
if not e:
|
149 |
return get_data_error_result(retmsg="Document not found!")
|
|
|
188 |
|
189 |
@manager.route('/rm', methods=['POST'])
|
190 |
@login_required
|
191 |
+
@validate_request("chunk_ids", "doc_id")
|
192 |
def rm():
|
193 |
req = request.json
|
194 |
try:
|
|
|
233 |
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
234 |
if not tenant_id:
|
235 |
return get_data_error_result(retmsg="Tenant not found!")
|
236 |
+
|
237 |
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
238 |
embd_mdl = TenantLLMService.model_instance(
|
239 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
240 |
+
|
241 |
v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
|
242 |
v = 0.1 * v[0] + 0.9 * v[1]
|
243 |
d["q_%d_vec" % len(v)] = v.tolist()
|
|
|
280 |
chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
|
281 |
question += keyword_extraction(chat_mdl, question)
|
282 |
|
283 |
+
retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
|
284 |
+
ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size,
|
285 |
+
similarity_threshold, vector_similarity_weight, top,
|
286 |
+
doc_ids, rerank_mdl=rerank_mdl)
|
287 |
for c in ranks["chunks"]:
|
288 |
if "vector" in c:
|
289 |
del c["vector"]
|
|
|
294 |
return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
|
295 |
retcode=RetCode.DATA_ERROR)
|
296 |
return server_error_response(e)
|
297 |
+
|
298 |
+
|
299 |
+
@manager.route('/knowledge_graph', methods=['GET'])
|
300 |
+
@login_required
|
301 |
+
def knowledge_graph():
|
302 |
+
doc_id = request.args["doc_id"]
|
303 |
+
req = {
|
304 |
+
"doc_ids":[doc_id],
|
305 |
+
"knowledge_graph_kwd": ["graph", "mind_map"]
|
306 |
+
}
|
307 |
+
tenant_id = DocumentService.get_tenant_id(doc_id)
|
308 |
+
sres = retrievaler.search(req, search.index_name(tenant_id))
|
309 |
+
obj = {"graph": {}, "mind_map": {}}
|
310 |
+
for id in sres.ids[:2]:
|
311 |
+
ty = sres.field[id]["knowledge_graph_kwd"]
|
312 |
+
try:
|
313 |
+
obj[ty] = json.loads(sres.field[id]["content_with_weight"])
|
314 |
+
except Exception as e:
|
315 |
+
print(traceback.format_exc(), flush=True)
|
316 |
+
|
317 |
+
return get_json_result(data=obj)
|
318 |
+
|
api/apps/dataset_api.py
CHANGED
@@ -623,7 +623,7 @@ def doc_parse_callback(doc_id, prog=None, msg=""):
|
|
623 |
if cancel:
|
624 |
raise Exception("The parsing process has been cancelled!")
|
625 |
|
626 |
-
|
627 |
def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
|
628 |
match parser_name:
|
629 |
case "book":
|
@@ -656,6 +656,7 @@ def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
|
|
656 |
return False
|
657 |
|
658 |
return True
|
|
|
659 |
|
660 |
|
661 |
@manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])
|
|
|
623 |
if cancel:
|
624 |
raise Exception("The parsing process has been cancelled!")
|
625 |
|
626 |
+
"""
|
627 |
def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
|
628 |
match parser_name:
|
629 |
case "book":
|
|
|
656 |
return False
|
657 |
|
658 |
return True
|
659 |
+
"""
|
660 |
|
661 |
|
662 |
@manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])
|
api/db/__init__.py
CHANGED
@@ -85,6 +85,7 @@ class ParserType(StrEnum):
|
|
85 |
PICTURE = "picture"
|
86 |
ONE = "one"
|
87 |
AUDIO = "audio"
|
|
|
88 |
|
89 |
|
90 |
class FileSource(StrEnum):
|
|
|
85 |
PICTURE = "picture"
|
86 |
ONE = "one"
|
87 |
AUDIO = "audio"
|
88 |
+
KG = "knowledge_graph"
|
89 |
|
90 |
|
91 |
class FileSource(StrEnum):
|
api/db/init_data.py
CHANGED
@@ -122,7 +122,7 @@ def init_llm_factory():
|
|
122 |
LLMService.filter_delete([LLMService.model.fid == "QAnything"])
|
123 |
TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
|
124 |
TenantService.filter_update([1 == 1], {
|
125 |
-
"parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio"})
|
126 |
## insert openai two embedding models to the current openai user.
|
127 |
print("Start to insert 2 OpenAI embedding models...")
|
128 |
tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
|
@@ -145,7 +145,7 @@ def init_llm_factory():
|
|
145 |
"""
|
146 |
drop table llm;
|
147 |
drop table llm_factories;
|
148 |
-
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio';
|
149 |
alter table knowledgebase modify avatar longtext;
|
150 |
alter table user modify avatar longtext;
|
151 |
alter table dialog modify icon longtext;
|
@@ -153,7 +153,7 @@ def init_llm_factory():
|
|
153 |
|
154 |
|
155 |
def add_graph_templates():
|
156 |
-
dir = os.path.join(get_project_base_directory(), "
|
157 |
for fnm in os.listdir(dir):
|
158 |
try:
|
159 |
cnvs = json.load(open(os.path.join(dir, fnm), "r"))
|
|
|
122 |
LLMService.filter_delete([LLMService.model.fid == "QAnything"])
|
123 |
TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
|
124 |
TenantService.filter_update([1 == 1], {
|
125 |
+
"parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph"})
|
126 |
## insert openai two embedding models to the current openai user.
|
127 |
print("Start to insert 2 OpenAI embedding models...")
|
128 |
tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
|
|
|
145 |
"""
|
146 |
drop table llm;
|
147 |
drop table llm_factories;
|
148 |
+
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph';
|
149 |
alter table knowledgebase modify avatar longtext;
|
150 |
alter table user modify avatar longtext;
|
151 |
alter table dialog modify icon longtext;
|
|
|
153 |
|
154 |
|
155 |
def add_graph_templates():
|
156 |
+
dir = os.path.join(get_project_base_directory(), "agent", "templates")
|
157 |
for fnm in os.listdir(dir):
|
158 |
try:
|
159 |
cnvs = json.load(open(os.path.join(dir, fnm), "r"))
|
api/db/services/dialog_service.py
CHANGED
@@ -18,12 +18,12 @@ import json
|
|
18 |
import re
|
19 |
from copy import deepcopy
|
20 |
|
21 |
-
from api.db import LLMType
|
22 |
from api.db.db_models import Dialog, Conversation
|
23 |
from api.db.services.common_service import CommonService
|
24 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
25 |
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
|
26 |
-
from api.settings import chat_logger, retrievaler
|
27 |
from rag.app.resume import forbidden_select_fields4resume
|
28 |
from rag.nlp import keyword_extraction
|
29 |
from rag.nlp.search import index_name
|
@@ -101,6 +101,9 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
101 |
yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
|
102 |
return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
|
103 |
|
|
|
|
|
|
|
104 |
questions = [m["content"] for m in messages if m["role"] == "user"]
|
105 |
embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
|
106 |
if llm_id2llm_type(dialog.llm_id) == "image2text":
|
@@ -138,7 +141,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
138 |
else:
|
139 |
if prompt_config.get("keyword", False):
|
140 |
questions[-1] += keyword_extraction(chat_mdl, questions[-1])
|
141 |
-
kbinfos =
|
142 |
dialog.similarity_threshold,
|
143 |
dialog.vector_similarity_weight,
|
144 |
doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
|
@@ -147,7 +150,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
147 |
#self-rag
|
148 |
if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges):
|
149 |
questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1])
|
150 |
-
kbinfos =
|
151 |
dialog.similarity_threshold,
|
152 |
dialog.vector_similarity_weight,
|
153 |
doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
|
@@ -179,7 +182,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
179 |
nonlocal prompt_config, knowledges, kwargs, kbinfos
|
180 |
refs = []
|
181 |
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
|
182 |
-
answer, idx =
|
183 |
[ck["content_ltks"]
|
184 |
for ck in kbinfos["chunks"]],
|
185 |
[ck["vector"]
|
|
|
18 |
import re
|
19 |
from copy import deepcopy
|
20 |
|
21 |
+
from api.db import LLMType, ParserType
|
22 |
from api.db.db_models import Dialog, Conversation
|
23 |
from api.db.services.common_service import CommonService
|
24 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
25 |
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
|
26 |
+
from api.settings import chat_logger, retrievaler, kg_retrievaler
|
27 |
from rag.app.resume import forbidden_select_fields4resume
|
28 |
from rag.nlp import keyword_extraction
|
29 |
from rag.nlp.search import index_name
|
|
|
101 |
yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
|
102 |
return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
|
103 |
|
104 |
+
is_kg = all([kb.parser_id == ParserType.KG for kb in kbs])
|
105 |
+
retr = retrievaler if not is_kg else kg_retrievaler
|
106 |
+
|
107 |
questions = [m["content"] for m in messages if m["role"] == "user"]
|
108 |
embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
|
109 |
if llm_id2llm_type(dialog.llm_id) == "image2text":
|
|
|
141 |
else:
|
142 |
if prompt_config.get("keyword", False):
|
143 |
questions[-1] += keyword_extraction(chat_mdl, questions[-1])
|
144 |
+
kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
|
145 |
dialog.similarity_threshold,
|
146 |
dialog.vector_similarity_weight,
|
147 |
doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
|
|
|
150 |
#self-rag
|
151 |
if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges):
|
152 |
questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1])
|
153 |
+
kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
|
154 |
dialog.similarity_threshold,
|
155 |
dialog.vector_similarity_weight,
|
156 |
doc_ids=kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None,
|
|
|
182 |
nonlocal prompt_config, knowledges, kwargs, kbinfos
|
183 |
refs = []
|
184 |
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
|
185 |
+
answer, idx = retr.insert_citations(answer,
|
186 |
[ck["content_ltks"]
|
187 |
for ck in kbinfos["chunks"]],
|
188 |
[ck["vector"]
|