yqkcn
commited on
Commit
·
35bb186
1
Parent(s):
2570acc
style: fix typo and format code (#2618)
Browse files### What problem does this PR solve?
- Fix typo
- Remove unused import
- Format code
### Type of change
- [x] Other (please describe): typo and format
- api/db/services/llm_service.py +2 -3
- graphrag/index.py +2 -3
- rag/app/knowledge_graph.py +4 -4
- rag/llm/chat_model.py +1 -1
- rag/utils/__init__.py +3 -5
api/db/services/llm_service.py
CHANGED
@@ -169,8 +169,8 @@ class TenantLLMService(CommonService):
|
|
169 |
|
170 |
num = 0
|
171 |
try:
|
172 |
-
for u in cls.query(tenant_id
|
173 |
-
num += cls.model.update(used_tokens
|
174 |
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
|
175 |
.execute()
|
176 |
except Exception as e:
|
@@ -252,7 +252,6 @@ class LLMBundle(object):
|
|
252 |
return
|
253 |
yield chunk
|
254 |
|
255 |
-
|
256 |
def chat(self, system, history, gen_conf):
|
257 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
258 |
if not TenantLLMService.increase_usage(
|
|
|
169 |
|
170 |
num = 0
|
171 |
try:
|
172 |
+
for u in cls.query(tenant_id=tenant_id, llm_name=mdlnm):
|
173 |
+
num += cls.model.update(used_tokens=u.used_tokens + used_tokens)\
|
174 |
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
|
175 |
.execute()
|
176 |
except Exception as e:
|
|
|
252 |
return
|
253 |
yield chunk
|
254 |
|
|
|
255 |
def chat(self, system, history, gen_conf):
|
256 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
257 |
if not TenantLLMService.increase_usage(
|
graphrag/index.py
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
-
import re
|
17 |
from concurrent.futures import ThreadPoolExecutor
|
18 |
import json
|
19 |
from functools import reduce
|
@@ -24,7 +23,7 @@ from api.db.services.llm_service import LLMBundle
|
|
24 |
from api.db.services.user_service import TenantService
|
25 |
from graphrag.community_reports_extractor import CommunityReportsExtractor
|
26 |
from graphrag.entity_resolution import EntityResolution
|
27 |
-
from graphrag.graph_extractor import GraphExtractor
|
28 |
from graphrag.mind_map_extractor import MindMapExtractor
|
29 |
from rag.nlp import rag_tokenizer
|
30 |
from rag.utils import num_tokens_from_string
|
@@ -52,7 +51,7 @@ def graph_merge(g1, g2):
|
|
52 |
return g
|
53 |
|
54 |
|
55 |
-
def
|
56 |
_, tenant = TenantService.get_by_id(tenant_id)
|
57 |
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
58 |
ext = GraphExtractor(llm_bdl)
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from concurrent.futures import ThreadPoolExecutor
|
17 |
import json
|
18 |
from functools import reduce
|
|
|
23 |
from api.db.services.user_service import TenantService
|
24 |
from graphrag.community_reports_extractor import CommunityReportsExtractor
|
25 |
from graphrag.entity_resolution import EntityResolution
|
26 |
+
from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
|
27 |
from graphrag.mind_map_extractor import MindMapExtractor
|
28 |
from rag.nlp import rag_tokenizer
|
29 |
from rag.utils import num_tokens_from_string
|
|
|
51 |
return g
|
52 |
|
53 |
|
54 |
+
def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=DEFAULT_ENTITY_TYPES):
|
55 |
_, tenant = TenantService.get_by_id(tenant_id)
|
56 |
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
57 |
ext = GraphExtractor(llm_bdl)
|
rag/app/knowledge_graph.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import re
|
2 |
|
3 |
-
from graphrag.index import
|
4 |
from rag.app import naive
|
5 |
from rag.nlp import rag_tokenizer, tokenize_chunks
|
6 |
|
@@ -15,9 +15,9 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|
15 |
parser_config["layout_recognize"] = False
|
16 |
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
17 |
parser_config=parser_config, callback=callback)
|
18 |
-
chunks =
|
19 |
-
|
20 |
-
|
21 |
for c in chunks: c["docnm_kwd"] = filename
|
22 |
|
23 |
doc = {
|
|
|
1 |
import re
|
2 |
|
3 |
+
from graphrag.index import build_knowledge_graph_chunks
|
4 |
from rag.app import naive
|
5 |
from rag.nlp import rag_tokenizer, tokenize_chunks
|
6 |
|
|
|
15 |
parser_config["layout_recognize"] = False
|
16 |
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
17 |
parser_config=parser_config, callback=callback)
|
18 |
+
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
|
19 |
+
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
|
20 |
+
)
|
21 |
for c in chunks: c["docnm_kwd"] = filename
|
22 |
|
23 |
doc = {
|
rag/llm/chat_model.py
CHANGED
@@ -20,7 +20,6 @@ from abc import ABC
|
|
20 |
from openai import OpenAI
|
21 |
import openai
|
22 |
from ollama import Client
|
23 |
-
from volcengine.maas.v2 import MaasService
|
24 |
from rag.nlp import is_english
|
25 |
from rag.utils import num_tokens_from_string
|
26 |
from groq import Groq
|
@@ -29,6 +28,7 @@ import json
|
|
29 |
import requests
|
30 |
import asyncio
|
31 |
|
|
|
32 |
class Base(ABC):
|
33 |
def __init__(self, key, model_name, base_url):
|
34 |
self.client = OpenAI(api_key=key, base_url=base_url)
|
|
|
20 |
from openai import OpenAI
|
21 |
import openai
|
22 |
from ollama import Client
|
|
|
23 |
from rag.nlp import is_english
|
24 |
from rag.utils import num_tokens_from_string
|
25 |
from groq import Groq
|
|
|
28 |
import requests
|
29 |
import asyncio
|
30 |
|
31 |
+
|
32 |
class Base(ABC):
|
33 |
def __init__(self, key, model_name, base_url):
|
34 |
self.client = OpenAI(api_key=key, base_url=base_url)
|
rag/utils/__init__.py
CHANGED
@@ -78,11 +78,9 @@ encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|
78 |
def num_tokens_from_string(string: str) -> int:
|
79 |
"""Returns the number of tokens in a text string."""
|
80 |
try:
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
pass
|
85 |
-
return 0
|
86 |
|
87 |
|
88 |
def truncate(string: str, max_len: int) -> str:
|
|
|
78 |
def num_tokens_from_string(string: str) -> int:
|
79 |
"""Returns the number of tokens in a text string."""
|
80 |
try:
|
81 |
+
return len(encoder.encode(string))
|
82 |
+
except Exception:
|
83 |
+
return 0
|
|
|
|
|
84 |
|
85 |
|
86 |
def truncate(string: str, max_len: int) -> str:
|