Yingfeng commited on
Commit
f23a141
·
1 Parent(s): f05a941

Synchronize with enterprise version (#4325)

Browse files

### Type of change

- [x] Refactoring

agent/templates/customer_service.json CHANGED
@@ -336,7 +336,7 @@
336
  "parameters": [],
337
  "presencePenaltyEnabled": true,
338
  "presence_penalty": 0.4,
339
- "prompt": "Role: You are a customer support. \n\nTask: Please answer the question based on content of knowledge base. \n\nReuirements & restrictions:\n - DO NOT make things up when all knowledge base content is irrelevant to the question. \n - Answers need to consider chat history.\n - Request about customer's contact information like, Wechat number, LINE number, twitter, discord, etc,. , when knowlegebase content can't answer his question. So, product expert could contact him soon to solve his problem.\n\n Knowledge base content is as following:\n {input}\n The above is the content of knowledge base.",
340
  "temperature": 0.1,
341
  "temperatureEnabled": true,
342
  "topPEnabled": true,
@@ -603,7 +603,7 @@
603
  {
604
  "data": {
605
  "form": {
606
- "text": "Static messages.\nDefine replys after recieve user's contact information."
607
  },
608
  "label": "Note",
609
  "name": "N: What else?"
@@ -691,7 +691,7 @@
691
  {
692
  "data": {
693
  "form": {
694
- "text": "Complete questions by conversation history.\nUser: What's RAGFlow?\nAssistant: RAGFlow is xxx.\nUser: How to deloy it?\n\nRefine it: How to deploy RAGFlow?"
695
  },
696
  "label": "Note",
697
  "name": "N: Refine Question"
 
336
  "parameters": [],
337
  "presencePenaltyEnabled": true,
338
  "presence_penalty": 0.4,
339
+ "prompt": "Role: You are a customer support. \n\nTask: Please answer the question based on content of knowledge base. \n\nRequirements & restrictions:\n - DO NOT make things up when all knowledge base content is irrelevant to the question. \n - Answers need to consider chat history.\n - Request about customer's contact information like, Wechat number, LINE number, twitter, discord, etc,. , when knowledge base content can't answer his question. So, product expert could contact him soon to solve his problem.\n\n Knowledge base content is as following:\n {input}\n The above is the content of knowledge base.",
340
  "temperature": 0.1,
341
  "temperatureEnabled": true,
342
  "topPEnabled": true,
 
603
  {
604
  "data": {
605
  "form": {
606
+ "text": "Static messages.\nDefine response after receive user's contact information."
607
  },
608
  "label": "Note",
609
  "name": "N: What else?"
 
691
  {
692
  "data": {
693
  "form": {
694
+ "text": "Complete questions by conversation history.\nUser: What's RAGFlow?\nAssistant: RAGFlow is xxx.\nUser: How to deploy it?\n\nRefine it: How to deploy RAGFlow?"
695
  },
696
  "label": "Note",
697
  "name": "N: Refine Question"
rag/app/knowledge_graph.py CHANGED
@@ -9,7 +9,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
9
  lang="Chinese", callback=None, **kwargs):
10
  parser_config = kwargs.get(
11
  "parser_config", {
12
- "chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
13
  eng = lang.lower() == "english"
14
 
15
  parser_config["layout_recognize"] = True
@@ -29,4 +29,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
29
  doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
30
  chunks.extend(tokenize_chunks(sections, doc, eng))
31
 
32
- return chunks
 
9
  lang="Chinese", callback=None, **kwargs):
10
  parser_config = kwargs.get(
11
  "parser_config", {
12
+ "chunk_token_num": 512, "delimiter": "\n!?;。;!?", "layout_recognize": True})
13
  eng = lang.lower() == "english"
14
 
15
  parser_config["layout_recognize"] = True
 
29
  doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
30
  chunks.extend(tokenize_chunks(sections, doc, eng))
31
 
32
+ return chunks
rag/app/manual.py CHANGED
@@ -256,7 +256,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
256
  res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
257
  return res
258
 
259
- elif re.search(r"\.docx$", filename, re.IGNORECASE):
260
  docx_parser = Docx()
261
  ti_list, tbls = docx_parser(filename, binary,
262
  from_page=0, to_page=10000, callback=callback)
@@ -279,4 +279,4 @@ if __name__ == "__main__":
279
  pass
280
 
281
 
282
- chunk(sys.argv[1], callback=dummy)
 
256
  res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
257
  return res
258
 
259
+ elif re.search(r"\.docx?$", filename, re.IGNORECASE):
260
  docx_parser = Docx()
261
  ti_list, tbls = docx_parser(filename, binary,
262
  from_page=0, to_page=10000, callback=callback)
 
279
  pass
280
 
281
 
282
+ chunk(sys.argv[1], callback=dummy)
rag/llm/chat_model.py CHANGED
@@ -24,7 +24,6 @@ import openai
24
  from ollama import Client
25
  from rag.nlp import is_chinese, is_english
26
  from rag.utils import num_tokens_from_string
27
- from groq import Groq
28
  import os
29
  import json
30
  import requests
@@ -840,6 +839,7 @@ class GeminiChat(Base):
840
 
841
  class GroqChat:
842
  def __init__(self, key, model_name, base_url=''):
 
843
  self.client = Groq(api_key=key)
844
  self.model_name = model_name
845
 
 
24
  from ollama import Client
25
  from rag.nlp import is_chinese, is_english
26
  from rag.utils import num_tokens_from_string
 
27
  import os
28
  import json
29
  import requests
 
839
 
840
  class GroqChat:
841
  def __init__(self, key, model_name, base_url=''):
842
+ from groq import Groq
843
  self.client = Groq(api_key=key)
844
  self.model_name = model_name
845
 
rag/llm/tts_model.py CHANGED
@@ -299,8 +299,6 @@ class SparkTTS:
299
  yield audio_chunk
300
 
301
 
302
-
303
-
304
  class XinferenceTTS:
305
  def __init__(self, key, model_name, **kwargs):
306
  self.base_url = kwargs.get("base_url", None)
@@ -330,3 +328,30 @@ class XinferenceTTS:
330
  for chunk in response.iter_content(chunk_size=1024):
331
  if chunk:
332
  yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  yield audio_chunk
300
 
301
 
 
 
302
  class XinferenceTTS:
303
  def __init__(self, key, model_name, **kwargs):
304
  self.base_url = kwargs.get("base_url", None)
 
328
  for chunk in response.iter_content(chunk_size=1024):
329
  if chunk:
330
  yield chunk
331
+
332
+
333
+ class OllamaTTS(Base):
334
+ def __init__(self, key, model_name="ollama-tts", base_url="https://api.ollama.ai/v1"):
335
+ if not base_url:
336
+ base_url = "https://api.ollama.ai/v1"
337
+ self.model_name = model_name
338
+ self.base_url = base_url
339
+ self.headers = {
340
+ "Content-Type": "application/json"
341
+ }
342
+
343
+ def tts(self, text, voice="standard-voice"):
344
+ payload = {
345
+ "model": self.model_name,
346
+ "voice": voice,
347
+ "input": text
348
+ }
349
+
350
+ response = requests.post(f"{self.base_url}/audio/tts", headers=self.headers, json=payload, stream=True)
351
+
352
+ if response.status_code != 200:
353
+ raise Exception(f"**Error**: {response.status_code}, {response.text}")
354
+
355
+ for chunk in response.iter_content():
356
+ if chunk:
357
+ yield chunk
rag/svr/cache_file_svr.py CHANGED
@@ -1,60 +1,60 @@
1
- #
2
- # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- #
16
- import logging
17
- import time
18
- import traceback
19
-
20
- from api.db.db_models import close_connection
21
- from api.db.services.task_service import TaskService
22
- from rag.utils.storage_factory import STORAGE_IMPL
23
- from rag.utils.redis_conn import REDIS_CONN
24
-
25
-
26
- def collect():
27
- doc_locations = TaskService.get_ongoing_doc_name()
28
- logging.debug(doc_locations)
29
- if len(doc_locations) == 0:
30
- time.sleep(1)
31
- return
32
- return doc_locations
33
-
34
- def main():
35
- locations = collect()
36
- if not locations:
37
- return
38
- logging.info(f"TASKS: {len(locations)}")
39
- for kb_id, loc in locations:
40
- try:
41
- if REDIS_CONN.is_alive():
42
- try:
43
- key = "{}/{}".format(kb_id, loc)
44
- if REDIS_CONN.exist(key):
45
- continue
46
- file_bin = STORAGE_IMPL.get(kb_id, loc)
47
- REDIS_CONN.transaction(key, file_bin, 12 * 60)
48
- logging.info("CACHE: {}".format(loc))
49
- except Exception as e:
50
- traceback.print_stack(e)
51
- except Exception as e:
52
- traceback.print_stack(e)
53
-
54
-
55
-
56
- if __name__ == "__main__":
57
- while True:
58
- main()
59
- close_connection()
60
  time.sleep(1)
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ import logging
17
+ import time
18
+ import traceback
19
+
20
+ from api.db.db_models import close_connection
21
+ from api.db.services.task_service import TaskService
22
+ from rag.utils.minio_conn import MINIOs
23
+ from rag.utils.redis_conn import REDIS_CONN
24
+
25
+
26
+ def collect():
27
+ doc_locations = TaskService.get_ongoing_doc_name()
28
+ logging.debug(doc_locations)
29
+ if len(doc_locations) == 0:
30
+ time.sleep(1)
31
+ return
32
+ return doc_locations
33
+
34
+
35
+ def main():
36
+ locations = collect()
37
+ if not locations:
38
+ return
39
+ logging.info(f"TASKS: {len(locations)}")
40
+ for kb_id, loc in locations:
41
+ try:
42
+ if REDIS_CONN.is_alive():
43
+ try:
44
+ key = "{}/{}".format(kb_id, loc)
45
+ if REDIS_CONN.exist(key):
46
+ continue
47
+ file_bin = MINIOs.get(kb_id, loc)
48
+ REDIS_CONN.transaction(key, file_bin, 12 * 60)
49
+ logging.info("CACHE: {}".format(loc))
50
+ except Exception as e:
51
+ traceback.print_stack(e)
52
+ except Exception as e:
53
+ traceback.print_stack(e)
54
+
55
+
56
+ if __name__ == "__main__":
57
+ while True:
58
+ main()
59
+ close_connection()
60
  time.sleep(1)