yqkcn commited on
Commit
35bb186
·
1 Parent(s): 2570acc

style: fix typo and format code (#2618)

Browse files

### What problem does this PR solve?

- Fix typo
- Remove unused import
- Format code

### Type of change

- [x] Other (please describe): typo and format

api/db/services/llm_service.py CHANGED
@@ -169,8 +169,8 @@ class TenantLLMService(CommonService):
169
 
170
  num = 0
171
  try:
172
- for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm):
173
- num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\
174
  .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
175
  .execute()
176
  except Exception as e:
@@ -252,7 +252,6 @@ class LLMBundle(object):
252
  return
253
  yield chunk
254
 
255
-
256
  def chat(self, system, history, gen_conf):
257
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
258
  if not TenantLLMService.increase_usage(
 
169
 
170
  num = 0
171
  try:
172
+ for u in cls.query(tenant_id=tenant_id, llm_name=mdlnm):
173
+ num += cls.model.update(used_tokens=u.used_tokens + used_tokens)\
174
  .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
175
  .execute()
176
  except Exception as e:
 
252
  return
253
  yield chunk
254
 
 
255
  def chat(self, system, history, gen_conf):
256
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
257
  if not TenantLLMService.increase_usage(
graphrag/index.py CHANGED
@@ -13,7 +13,6 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import re
17
  from concurrent.futures import ThreadPoolExecutor
18
  import json
19
  from functools import reduce
@@ -24,7 +23,7 @@ from api.db.services.llm_service import LLMBundle
24
  from api.db.services.user_service import TenantService
25
  from graphrag.community_reports_extractor import CommunityReportsExtractor
26
  from graphrag.entity_resolution import EntityResolution
27
- from graphrag.graph_extractor import GraphExtractor
28
  from graphrag.mind_map_extractor import MindMapExtractor
29
  from rag.nlp import rag_tokenizer
30
  from rag.utils import num_tokens_from_string
@@ -52,7 +51,7 @@ def graph_merge(g1, g2):
52
  return g
53
 
54
 
55
- def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=["organization", "person", "location", "event", "time"]):
56
  _, tenant = TenantService.get_by_id(tenant_id)
57
  llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
58
  ext = GraphExtractor(llm_bdl)
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from concurrent.futures import ThreadPoolExecutor
17
  import json
18
  from functools import reduce
 
23
  from api.db.services.user_service import TenantService
24
  from graphrag.community_reports_extractor import CommunityReportsExtractor
25
  from graphrag.entity_resolution import EntityResolution
26
+ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
27
  from graphrag.mind_map_extractor import MindMapExtractor
28
  from rag.nlp import rag_tokenizer
29
  from rag.utils import num_tokens_from_string
 
51
  return g
52
 
53
 
54
+ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=DEFAULT_ENTITY_TYPES):
55
  _, tenant = TenantService.get_by_id(tenant_id)
56
  llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
57
  ext = GraphExtractor(llm_bdl)
rag/app/knowledge_graph.py CHANGED
@@ -1,6 +1,6 @@
1
  import re
2
 
3
- from graphrag.index import build_knowlege_graph_chunks
4
  from rag.app import naive
5
  from rag.nlp import rag_tokenizer, tokenize_chunks
6
 
@@ -15,9 +15,9 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
15
  parser_config["layout_recognize"] = False
16
  sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
17
  parser_config=parser_config, callback=callback)
18
- chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
19
- parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
20
- )
21
  for c in chunks: c["docnm_kwd"] = filename
22
 
23
  doc = {
 
1
  import re
2
 
3
+ from graphrag.index import build_knowledge_graph_chunks
4
  from rag.app import naive
5
  from rag.nlp import rag_tokenizer, tokenize_chunks
6
 
 
15
  parser_config["layout_recognize"] = False
16
  sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
17
  parser_config=parser_config, callback=callback)
18
+ chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
19
+ parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
20
+ )
21
  for c in chunks: c["docnm_kwd"] = filename
22
 
23
  doc = {
rag/llm/chat_model.py CHANGED
@@ -20,7 +20,6 @@ from abc import ABC
20
  from openai import OpenAI
21
  import openai
22
  from ollama import Client
23
- from volcengine.maas.v2 import MaasService
24
  from rag.nlp import is_english
25
  from rag.utils import num_tokens_from_string
26
  from groq import Groq
@@ -29,6 +28,7 @@ import json
29
  import requests
30
  import asyncio
31
 
 
32
  class Base(ABC):
33
  def __init__(self, key, model_name, base_url):
34
  self.client = OpenAI(api_key=key, base_url=base_url)
 
20
  from openai import OpenAI
21
  import openai
22
  from ollama import Client
 
23
  from rag.nlp import is_english
24
  from rag.utils import num_tokens_from_string
25
  from groq import Groq
 
28
  import requests
29
  import asyncio
30
 
31
+
32
  class Base(ABC):
33
  def __init__(self, key, model_name, base_url):
34
  self.client = OpenAI(api_key=key, base_url=base_url)
rag/utils/__init__.py CHANGED
@@ -78,11 +78,9 @@ encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
78
  def num_tokens_from_string(string: str) -> int:
79
  """Returns the number of tokens in a text string."""
80
  try:
81
- num_tokens = len(encoder.encode(string))
82
- return num_tokens
83
- except Exception as e:
84
- pass
85
- return 0
86
 
87
 
88
  def truncate(string: str, max_len: int) -> str:
 
78
  def num_tokens_from_string(string: str) -> int:
79
  """Returns the number of tokens in a text string."""
80
  try:
81
+ return len(encoder.encode(string))
82
+ except Exception:
83
+ return 0
 
 
84
 
85
 
86
  def truncate(string: str, max_len: int) -> str: