黄腾 aopstudio Kevin Hu commited on
Commit
e10ed78
·
1 Parent(s): 2324bcb

add support for TTS model (#2095)

Browse files

### What problem does this PR solve?

add support for TTS model
#1853

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <[email protected]>
Co-authored-by: Kevin Hu <[email protected]>

api/apps/llm_app.py CHANGED
@@ -20,7 +20,7 @@ from api.utils.api_utils import server_error_response, get_data_error_result, va
20
  from api.db import StatusEnum, LLMType
21
  from api.db.db_models import TenantLLM
22
  from api.utils.api_utils import get_json_result
23
- from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel
24
  import requests
25
  import ast
26
 
@@ -142,6 +142,10 @@ def add_llm():
142
  llm_name = req["llm_name"]
143
  api_key = '{' + f'"yiyan_ak": "{req.get("yiyan_ak", "")}", ' \
144
  f'"yiyan_sk": "{req.get("yiyan_sk", "")}"' + '}'
 
 
 
 
145
  else:
146
  llm_name = req["llm_name"]
147
  api_key = req.get("api_key","xxxxxxxxxxxxxxx")
@@ -215,6 +219,15 @@ def add_llm():
215
  pass
216
  except Exception as e:
217
  msg += f"\nFail to access model({llm['llm_name']})." + str(e)
 
 
 
 
 
 
 
 
 
218
  else:
219
  # TODO: check other type of models
220
  pass
 
20
  from api.db import StatusEnum, LLMType
21
  from api.db.db_models import TenantLLM
22
  from api.utils.api_utils import get_json_result
23
+ from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
24
  import requests
25
  import ast
26
 
 
142
  llm_name = req["llm_name"]
143
  api_key = '{' + f'"yiyan_ak": "{req.get("yiyan_ak", "")}", ' \
144
  f'"yiyan_sk": "{req.get("yiyan_sk", "")}"' + '}'
145
+ elif factory == "Fish Audio":
146
+ llm_name = req["llm_name"]
147
+ api_key = '{' + f'"fish_audio_ak": "{req.get("fish_audio_ak", "")}", ' \
148
+ f'"fish_audio_refid": "{req.get("fish_audio_refid", "59cb5986671546eaa6ca8ae6f29f6d22")}"' + '}'
149
  else:
150
  llm_name = req["llm_name"]
151
  api_key = req.get("api_key","xxxxxxxxxxxxxxx")
 
219
  pass
220
  except Exception as e:
221
  msg += f"\nFail to access model({llm['llm_name']})." + str(e)
222
+ elif llm["model_type"] == LLMType.TTS:
223
+ mdl = TTSModel[factory](
224
+ key=llm["api_key"], model_name=llm["llm_name"], base_url=llm["api_base"]
225
+ )
226
+ try:
227
+ for resp in mdl.transcription("Hello~ Ragflower!"):
228
+ pass
229
+ except RuntimeError as e:
230
+ msg += f"\nFail to access model({llm['llm_name']})." + str(e)
231
  else:
232
  # TODO: check other type of models
233
  pass
api/apps/user_app.py CHANGED
@@ -410,7 +410,7 @@ def tenant_info():
410
 
411
  @manager.route("/set_tenant_info", methods=["POST"])
412
  @login_required
413
- @validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id")
414
  def set_tenant_info():
415
  req = request.json
416
  try:
 
410
 
411
  @manager.route("/set_tenant_info", methods=["POST"])
412
  @login_required
413
+ @validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id", "tts_id")
414
  def set_tenant_info():
415
  req = request.json
416
  try:
api/db/__init__.py CHANGED
@@ -55,6 +55,7 @@ class LLMType(StrEnum):
55
  SPEECH2TEXT = 'speech2text'
56
  IMAGE2TEXT = 'image2text'
57
  RERANK = 'rerank'
 
58
 
59
 
60
  class ChatStyle(StrEnum):
 
55
  SPEECH2TEXT = 'speech2text'
56
  IMAGE2TEXT = 'image2text'
57
  RERANK = 'rerank'
58
+ TTS = 'tts'
59
 
60
 
61
  class ChatStyle(StrEnum):
api/db/db_models.py CHANGED
@@ -449,6 +449,11 @@ class Tenant(DataBaseModel):
449
  null=False,
450
  help_text="default rerank model ID",
451
  index=True)
 
 
 
 
 
452
  parser_ids = CharField(
453
  max_length=256,
454
  null=False,
@@ -958,6 +963,13 @@ def migrate_db():
958
  )
959
  except Exception as e:
960
  pass
 
 
 
 
 
 
 
961
  try:
962
  migrate(
963
  migrator.add_column('api_4_conversation', 'source',
 
449
  null=False,
450
  help_text="default rerank model ID",
451
  index=True)
452
+ tts_id = CharField(
453
+ max_length=256,
454
+ null=True,
455
+ help_text="default tts model ID",
456
+ index=True)
457
  parser_ids = CharField(
458
  max_length=256,
459
  null=False,
 
963
  )
964
  except Exception as e:
965
  pass
966
+ try:
967
+ migrate(
968
+ migrator.add_column("tenant","tts_id",
969
+ CharField(max_length=256,null=True,help_text="default tts model ID",index=True))
970
+ )
971
+ except Exception as e:
972
+ pass
973
  try:
974
  migrate(
975
  migrator.add_column('api_4_conversation', 'source',
api/db/services/llm_service.py CHANGED
@@ -15,7 +15,7 @@
15
  #
16
  from api.db.services.user_service import TenantService
17
  from api.settings import database_logger
18
- from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
19
  from api.db import LLMType
20
  from api.db.db_models import DB, UserTenant
21
  from api.db.db_models import LLMFactories, LLM, TenantLLM
@@ -75,6 +75,8 @@ class TenantLLMService(CommonService):
75
  mdlnm = tenant.llm_id if not llm_name else llm_name
76
  elif llm_type == LLMType.RERANK:
77
  mdlnm = tenant.rerank_id if not llm_name else llm_name
 
 
78
  else:
79
  assert False, "LLM type error"
80
 
@@ -127,6 +129,14 @@ class TenantLLMService(CommonService):
127
  model_config["api_key"], model_config["llm_name"], lang,
128
  base_url=model_config["api_base"]
129
  )
 
 
 
 
 
 
 
 
130
 
131
  @classmethod
132
  @DB.connection_context()
@@ -144,7 +154,9 @@ class TenantLLMService(CommonService):
144
  elif llm_type == LLMType.CHAT.value:
145
  mdlnm = tenant.llm_id if not llm_name else llm_name
146
  elif llm_type == LLMType.RERANK:
147
- mdlnm = tenant.llm_id if not llm_name else llm_name
 
 
148
  else:
149
  assert False, "LLM type error"
150
 
 
15
  #
16
  from api.db.services.user_service import TenantService
17
  from api.settings import database_logger
18
+ from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
19
  from api.db import LLMType
20
  from api.db.db_models import DB, UserTenant
21
  from api.db.db_models import LLMFactories, LLM, TenantLLM
 
75
  mdlnm = tenant.llm_id if not llm_name else llm_name
76
  elif llm_type == LLMType.RERANK:
77
  mdlnm = tenant.rerank_id if not llm_name else llm_name
78
+ elif llm_type == LLMType.TTS:
79
+ mdlnm = tenant.tts_id if not llm_name else llm_name
80
  else:
81
  assert False, "LLM type error"
82
 
 
129
  model_config["api_key"], model_config["llm_name"], lang,
130
  base_url=model_config["api_base"]
131
  )
132
+ if llm_type == LLMType.TTS:
133
+ if model_config["llm_factory"] not in TTSModel:
134
+ return
135
+ return TTSModel[model_config["llm_factory"]](
136
+ model_config["api_key"],
137
+ model_config["llm_name"],
138
+ base_url=model_config["api_base"],
139
+ )
140
 
141
  @classmethod
142
  @DB.connection_context()
 
154
  elif llm_type == LLMType.CHAT.value:
155
  mdlnm = tenant.llm_id if not llm_name else llm_name
156
  elif llm_type == LLMType.RERANK:
157
+ mdlnm = tenant.rerank_id if not llm_name else llm_name
158
+ elif llm_type == LLMType.TTS:
159
+ mdlnm = tenant.tts_id if not llm_name else llm_name
160
  else:
161
  assert False, "LLM type error"
162
 
conf/llm_factories.json CHANGED
@@ -3214,6 +3214,13 @@
3214
  "tags": "LLM",
3215
  "status": "1",
3216
  "llm": []
 
 
 
 
 
 
 
3217
  }
3218
  ]
3219
  }
 
3214
  "tags": "LLM",
3215
  "status": "1",
3216
  "llm": []
3217
+ },
3218
+ {
3219
+ "name": "Fish Audio",
3220
+ "logo": "",
3221
+ "tags": "TTS",
3222
+ "status": "1",
3223
+ "llm": []
3224
  }
3225
  ]
3226
  }
rag/llm/__init__.py CHANGED
@@ -18,6 +18,7 @@ from .chat_model import *
18
  from .cv_model import *
19
  from .rerank_model import *
20
  from .sequence2txt_model import *
 
21
 
22
  EmbeddingModel = {
23
  "Ollama": OllamaEmbed,
@@ -129,3 +130,7 @@ Seq2txtModel = {
129
  "Azure-OpenAI": AzureSeq2txt,
130
  "Xinference": XinferenceSeq2txt
131
  }
 
 
 
 
 
18
  from .cv_model import *
19
  from .rerank_model import *
20
  from .sequence2txt_model import *
21
+ from .tts_model import *
22
 
23
  EmbeddingModel = {
24
  "Ollama": OllamaEmbed,
 
130
  "Azure-OpenAI": AzureSeq2txt,
131
  "Xinference": XinferenceSeq2txt
132
  }
133
+
134
+ TTSModel = {
135
+ "Fish Audio": FishAudioTTS
136
+ }
rag/llm/tts_model.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ from typing import Annotated, Literal
18
+ from abc import ABC
19
+ import httpx
20
+ import ormsgpack
21
+ from pydantic import BaseModel, conint
22
+ from rag.utils import num_tokens_from_string
23
+ import json
24
+
25
+
26
+ class ServeReferenceAudio(BaseModel):
27
+ audio: bytes
28
+ text: str
29
+
30
+
31
+ class ServeTTSRequest(BaseModel):
32
+ text: str
33
+ chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
34
+ # Audio format
35
+ format: Literal["wav", "pcm", "mp3"] = "mp3"
36
+ mp3_bitrate: Literal[64, 128, 192] = 128
37
+ # References audios for in-context learning
38
+ references: list[ServeReferenceAudio] = []
39
+ # Reference id
40
+ # For example, if you want use https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/
41
+ # Just pass 7f92f8afb8ec43bf81429cc1c9199cb1
42
+ reference_id: str | None = None
43
+ # Normalize text for en & zh, this increase stability for numbers
44
+ normalize: bool = True
45
+ # Balance mode will reduce latency to 300ms, but may decrease stability
46
+ latency: Literal["normal", "balanced"] = "normal"
47
+
48
+
49
+ class Base(ABC):
50
+ def __init__(self, key, model_name, base_url):
51
+ pass
52
+
53
+ def transcription(self, audio):
54
+ pass
55
+
56
+
57
+ class FishAudioTTS(Base):
58
+ def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
59
+ if not base_url:
60
+ base_url = "https://api.fish.audio/v1/tts"
61
+ key = json.loads(key)
62
+ self.headers = {
63
+ "api-key": key.get("fish_audio_ak"),
64
+ "content-type": "application/msgpack",
65
+ }
66
+ self.ref_id = key.get("fish_audio_refid")
67
+ self.base_url = base_url
68
+
69
+ def transcription(self, text):
70
+ from http import HTTPStatus
71
+
72
+ request = request = ServeTTSRequest(text=text, reference_id=self.ref_id)
73
+
74
+ with httpx.Client() as client:
75
+ try:
76
+ with client.stream(
77
+ method="POST",
78
+ url=self.base_url,
79
+ content=ormsgpack.packb(
80
+ request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC
81
+ ),
82
+ headers=self.headers,
83
+ timeout=None,
84
+ ) as response:
85
+ if response.status_code == HTTPStatus.OK:
86
+ for chunk in response.iter_bytes():
87
+ yield chunk
88
+ else:
89
+ response.raise_for_status()
90
+
91
+ yield num_tokens_from_string(text)
92
+
93
+ except httpx.HTTPStatusError as e:
94
+ raise RuntimeError(f"**ERROR**: {e}")
requirements.txt CHANGED
@@ -47,6 +47,7 @@ openai==1.12.0
47
  opencv_python==4.9.0.80
48
  opencv_python_headless==4.9.0.80
49
  openpyxl==3.1.2
 
50
  pandas==2.2.2
51
  pdfplumber==0.10.4
52
  peewee==3.17.1
 
47
  opencv_python==4.9.0.80
48
  opencv_python_headless==4.9.0.80
49
  openpyxl==3.1.2
50
+ ormsgpack==1.5.0
51
  pandas==2.2.2
52
  pdfplumber==0.10.4
53
  peewee==3.17.1
requirements_arm.txt CHANGED
@@ -74,6 +74,7 @@ ollama==0.1.9
74
  openai==1.12.0
75
  opencv-python==4.9.0.80
76
  openpyxl==3.1.2
 
77
  packaging==23.2
78
  pandas==2.2.1
79
  pdfminer.six==20221105
 
74
  openai==1.12.0
75
  opencv-python==4.9.0.80
76
  openpyxl==3.1.2
77
+ ormsgpack==1.5.0
78
  packaging==23.2
79
  pandas==2.2.1
80
  pdfminer.six==20221105
web/src/assets/svg/llm/fish-audio.svg ADDED
web/src/constants/knowledge.ts CHANGED
@@ -48,6 +48,7 @@ export enum LlmModelType {
48
  Image2text = 'image2text',
49
  Speech2text = 'speech2text',
50
  Rerank = 'rerank',
 
51
  }
52
 
53
  export enum KnowledgeSearchParams {
 
48
  Image2text = 'image2text',
49
  Speech2text = 'speech2text',
50
  Rerank = 'rerank',
51
+ TTS = 'tts',
52
  }
53
 
54
  export enum KnowledgeSearchParams {
web/src/hooks/llm-hooks.ts CHANGED
@@ -87,6 +87,7 @@ export const useSelectLlmOptionsByModelType = () => {
87
  LlmModelType.Speech2text,
88
  ),
89
  [LlmModelType.Rerank]: groupOptionsByModelType(LlmModelType.Rerank),
 
90
  };
91
  };
92
 
 
87
  LlmModelType.Speech2text,
88
  ),
89
  [LlmModelType.Rerank]: groupOptionsByModelType(LlmModelType.Rerank),
90
+ [LlmModelType.TTS]: groupOptionsByModelType(LlmModelType.TTS),
91
  };
92
  };
93
 
web/src/interfaces/database/knowledge.ts CHANGED
@@ -71,6 +71,7 @@ export interface ITenantInfo {
71
  tenant_id: string;
72
  chat_id: string;
73
  speech2text_id: string;
 
74
  }
75
 
76
  export interface IChunk {
 
71
  tenant_id: string;
72
  chat_id: string;
73
  speech2text_id: string;
74
+ tts_id: string;
75
  }
76
 
77
  export interface IChunk {
web/src/locales/en.ts CHANGED
@@ -490,6 +490,9 @@ The above is the content you need to summarize.`,
490
  'The default ASR model all the newly created knowledgebase will use. Use this model to translate voices to corresponding text.',
491
  rerankModel: 'Rerank Model',
492
  rerankModelTip: `The default rerank model is used to rerank chunks retrieved by users' questions.`,
 
 
 
493
  workspace: 'Workspace',
494
  upgrade: 'Upgrade',
495
  addLlmTitle: 'Add LLM',
@@ -502,6 +505,7 @@ The above is the content you need to summarize.`,
502
  baseUrlNameMessage: 'Please input your base url!',
503
  vision: 'Does it support Vision?',
504
  ollamaLink: 'How to integrate {{name}}',
 
505
  volcModelNameMessage: 'Please input your model name!',
506
  addEndpointID: 'EndpointID of the model',
507
  endpointIDMessage: 'Please input your EndpointID of the model',
@@ -533,6 +537,13 @@ The above is the content you need to summarize.`,
533
  yiyanAKMessage: 'Please input your API KEY',
534
  addyiyanSK: 'yiyan Secret KEY',
535
  yiyanSKMessage: 'Please input your Secret KEY',
 
 
 
 
 
 
 
536
  },
537
  message: {
538
  registered: 'Registered!',
 
490
  'The default ASR model all the newly created knowledgebase will use. Use this model to translate voices to corresponding text.',
491
  rerankModel: 'Rerank Model',
492
  rerankModelTip: `The default rerank model is used to rerank chunks retrieved by users' questions.`,
493
+ ttsModel: 'TTS Model',
494
+ ttsModelTip:
495
+ 'The default TTS model will be used to generate speech during conversations upon request.',
496
  workspace: 'Workspace',
497
  upgrade: 'Upgrade',
498
  addLlmTitle: 'Add LLM',
 
505
  baseUrlNameMessage: 'Please input your base url!',
506
  vision: 'Does it support Vision?',
507
  ollamaLink: 'How to integrate {{name}}',
508
+ FishAudioLink: 'How to use FishAudio',
509
  volcModelNameMessage: 'Please input your model name!',
510
  addEndpointID: 'EndpointID of the model',
511
  endpointIDMessage: 'Please input your EndpointID of the model',
 
537
  yiyanAKMessage: 'Please input your API KEY',
538
  addyiyanSK: 'yiyan Secret KEY',
539
  yiyanSKMessage: 'Please input your Secret KEY',
540
+ FishAudioModelNameMessage:
541
+ 'Please give your speech synthesis model a name',
542
+ addFishAudioAK: 'Fish Audio API KEY',
543
+ addFishAudioAKMessage: 'Please input your API KEY',
544
+ addFishAudioRefID: 'FishAudio Refrence ID',
545
+ addFishAudioRefIDMessage:
546
+ 'Please input the Reference ID (leave blank to use the default model).',
547
  },
548
  message: {
549
  registered: 'Registered!',
web/src/locales/zh-traditional.ts CHANGED
@@ -443,6 +443,8 @@ export default {
443
  systemModelSettings: '系統模型設置',
444
  chatModel: '聊天模型',
445
  chatModelTip: '所有新創建的知識庫都會使用默認的聊天LLM。',
 
 
446
  embeddingModel: '嵌入模型',
447
  embeddingModelTip: '所有新創建的知識庫都將使用的默認嵌入模型。',
448
  img2txtModel: 'img2Txt模型',
@@ -465,6 +467,7 @@ export default {
465
  modelTypeMessage: '請輸入模型類型!',
466
  baseUrlNameMessage: '請輸入基礎 Url!',
467
  ollamaLink: '如何集成 {{name}}',
 
468
  volcModelNameMessage: '請輸入模型名稱!',
469
  addEndpointID: '模型 EndpointID',
470
  endpointIDMessage: '請輸入模型對應的EndpointID',
@@ -496,6 +499,10 @@ export default {
496
  yiyanAKMessage: '請輸入 API KEY',
497
  addyiyanSK: '一言 Secret KEY',
498
  yiyanSKMessage: '請輸入 Secret KEY',
 
 
 
 
499
  },
500
  message: {
501
  registered: '註冊成功',
 
443
  systemModelSettings: '系統模型設置',
444
  chatModel: '聊天模型',
445
  chatModelTip: '所有新創建的知識庫都會使用默認的聊天LLM。',
446
+ ttsModel: '語音合成模型',
447
+ ttsModelTip: '默認的tts模型會被用於在對話過程中請求語音生成時使用。',
448
  embeddingModel: '嵌入模型',
449
  embeddingModelTip: '所有新創建的知識庫都將使用的默認嵌入模型。',
450
  img2txtModel: 'img2Txt模型',
 
467
  modelTypeMessage: '請輸入模型類型!',
468
  baseUrlNameMessage: '請輸入基礎 Url!',
469
  ollamaLink: '如何集成 {{name}}',
470
+ FishAudioLink: '如何使用Fish Audio',
471
  volcModelNameMessage: '請輸入模型名稱!',
472
  addEndpointID: '模型 EndpointID',
473
  endpointIDMessage: '請輸入模型對應的EndpointID',
 
499
  yiyanAKMessage: '請輸入 API KEY',
500
  addyiyanSK: '一言 Secret KEY',
501
  yiyanSKMessage: '請輸入 Secret KEY',
502
+ addFishAudioAK: 'Fish Audio API KEY',
503
+ addFishAudioAKMessage: '請輸入 API KEY',
504
+ addFishAudioRefID: 'FishAudio Refrence ID',
505
+ addFishAudioRefIDMessage: '請輸入引用模型的ID(留空表示使用默認模型)',
506
  },
507
  message: {
508
  registered: '註冊成功',
web/src/locales/zh.ts CHANGED
@@ -460,6 +460,8 @@ export default {
460
  systemModelSettings: '系统模型设置',
461
  chatModel: '聊天模型',
462
  chatModelTip: '所有新创建的知识库都会使用默认的聊天LLM。',
 
 
463
  embeddingModel: '嵌入模型',
464
  embeddingModelTip: '所有新创建的知识库都将使用的默认嵌入模型。',
465
  img2txtModel: 'Img2txt模型',
@@ -482,6 +484,7 @@ export default {
482
  modelTypeMessage: '请输入模型类型!',
483
  baseUrlNameMessage: '请输入基础 Url!',
484
  ollamaLink: '如何集成 {{name}}',
 
485
  volcModelNameMessage: '请输入模型名称!',
486
  addEndpointID: '模型 EndpointID',
487
  endpointIDMessage: '请输入模型对应的EndpointID',
@@ -513,6 +516,10 @@ export default {
513
  yiyanAKMessage: '请输入 API KEY',
514
  addyiyanSK: '一言 Secret KEY',
515
  yiyanSKMessage: '请输入 Secret KEY',
 
 
 
 
516
  },
517
  message: {
518
  registered: '注册成功',
 
460
  systemModelSettings: '系统模型设置',
461
  chatModel: '聊天模型',
462
  chatModelTip: '所有新创建的知识库都会使用默认的聊天LLM。',
463
+ ttsModel: 'TTS模型',
464
+ ttsModelTip: '默认的tts模型会被用于在对话过程中请求语音生成时使用',
465
  embeddingModel: '嵌入模型',
466
  embeddingModelTip: '所有新创建的知识库都将使用的默认嵌入模型。',
467
  img2txtModel: 'Img2txt模型',
 
484
  modelTypeMessage: '请输入模型类型!',
485
  baseUrlNameMessage: '请输入基础 Url!',
486
  ollamaLink: '如何集成 {{name}}',
487
+ FishAudioLink: '如何使用Fish Audio',
488
  volcModelNameMessage: '请输入模型名称!',
489
  addEndpointID: '模型 EndpointID',
490
  endpointIDMessage: '请输入模型对应的EndpointID',
 
516
  yiyanAKMessage: '请输入 API KEY',
517
  addyiyanSK: '一言 Secret KEY',
518
  yiyanSKMessage: '请输入 Secret KEY',
519
+ addFishAudioAK: 'Fish Audio API KEY',
520
+ FishAudioAKMessage: '请输入 API KEY',
521
+ addFishAudioRefID: 'FishAudio Refrence ID',
522
+ FishAudioRefIDMessage: '请输入引用模型的ID(留空表示使用默认模型)',
523
  },
524
  message: {
525
  registered: '注册成功',
web/src/pages/user-setting/setting-model/constant.ts CHANGED
@@ -35,6 +35,7 @@ export const IconMap = {
35
  'Tencent Hunyuan': 'hunyuan',
36
  'XunFei Spark': 'spark',
37
  BaiduYiyan: 'yiyan',
 
38
  };
39
 
40
  export const BedrockRegionList = [
 
35
  'Tencent Hunyuan': 'hunyuan',
36
  'XunFei Spark': 'spark',
37
  BaiduYiyan: 'yiyan',
38
+ 'Fish Audio': 'fish-audio',
39
  };
40
 
41
  export const BedrockRegionList = [
web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useTranslate } from '@/hooks/common-hooks';
2
+ import { IModalProps } from '@/interfaces/common';
3
+ import { IAddLlmRequestBody } from '@/interfaces/request/llm';
4
+ import { Flex, Form, Input, Modal, Select, Space } from 'antd';
5
+ import omit from 'lodash/omit';
6
+
7
+ type FieldType = IAddLlmRequestBody & {
8
+ fish_audio_ak: string;
9
+ fish_audio_refid: string;
10
+ };
11
+
12
+ const { Option } = Select;
13
+
14
+ const FishAudioModal = ({
15
+ visible,
16
+ hideModal,
17
+ onOk,
18
+ loading,
19
+ llmFactory,
20
+ }: IModalProps<IAddLlmRequestBody> & { llmFactory: string }) => {
21
+ const [form] = Form.useForm<FieldType>();
22
+
23
+ const { t } = useTranslate('setting');
24
+
25
+ const handleOk = async () => {
26
+ const values = await form.validateFields();
27
+ const modelType = values.model_type;
28
+
29
+ const data = {
30
+ ...omit(values),
31
+ model_type: modelType,
32
+ llm_factory: llmFactory,
33
+ };
34
+ console.info(data);
35
+
36
+ onOk?.(data);
37
+ };
38
+
39
+ return (
40
+ <Modal
41
+ title={t('addLlmTitle', { name: llmFactory })}
42
+ open={visible}
43
+ onOk={handleOk}
44
+ onCancel={hideModal}
45
+ okButtonProps={{ loading }}
46
+ footer={(originNode: React.ReactNode) => {
47
+ return (
48
+ <Flex justify={'space-between'}>
49
+ <a href={`https://fish.audio`} target="_blank" rel="noreferrer">
50
+ {t('FishAudioLink')}
51
+ </a>
52
+ <Space>{originNode}</Space>
53
+ </Flex>
54
+ );
55
+ }}
56
+ confirmLoading={loading}
57
+ >
58
+ <Form
59
+ name="basic"
60
+ style={{ maxWidth: 600 }}
61
+ autoComplete="off"
62
+ layout={'vertical'}
63
+ form={form}
64
+ >
65
+ <Form.Item<FieldType>
66
+ label={t('modelType')}
67
+ name="model_type"
68
+ initialValue={'tts'}
69
+ rules={[{ required: true, message: t('modelTypeMessage') }]}
70
+ >
71
+ <Select placeholder={t('modelTypeMessage')}>
72
+ <Option value="tts">tts</Option>
73
+ </Select>
74
+ </Form.Item>
75
+ <Form.Item<FieldType>
76
+ label={t('modelName')}
77
+ name="llm_name"
78
+ rules={[{ required: true, message: t('FishAudioModelNameMessage') }]}
79
+ >
80
+ <Input placeholder={t('FishAudioModelNameMessage')} />
81
+ </Form.Item>
82
+ <Form.Item<FieldType>
83
+ label={t('addFishAudioAK')}
84
+ name="FishAudio_ak"
85
+ rules={[{ required: true, message: t('FishAudioAKMessage') }]}
86
+ >
87
+ <Input placeholder={t('FishAudioAKMessage')} />
88
+ </Form.Item>
89
+ <Form.Item<FieldType>
90
+ label={t('addFishAudioRefID')}
91
+ name="FishAudio_refid"
92
+ rules={[{ required: false, message: t('FishAudioRefIDMessage') }]}
93
+ >
94
+ <Input placeholder={t('FishAudioRefIDMessage')} />
95
+ </Form.Item>
96
+ </Form>
97
+ </Modal>
98
+ );
99
+ };
100
+
101
+ export default FishAudioModal;
web/src/pages/user-setting/setting-model/hooks.ts CHANGED
@@ -244,6 +244,33 @@ export const useSubmityiyan = () => {
244
  };
245
  };
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  export const useSubmitBedrock = () => {
248
  const { addLlm, loading } = useAddLlm();
249
  const {
 
244
  };
245
  };
246
 
247
+ export const useSubmitFishAudio = () => {
248
+ const { addLlm, loading } = useAddLlm();
249
+ const {
250
+ visible: FishAudioAddingVisible,
251
+ hideModal: hideFishAudioAddingModal,
252
+ showModal: showFishAudioAddingModal,
253
+ } = useSetModalState();
254
+
255
+ const onFishAudioAddingOk = useCallback(
256
+ async (payload: IAddLlmRequestBody) => {
257
+ const ret = await addLlm(payload);
258
+ if (ret === 0) {
259
+ hideFishAudioAddingModal();
260
+ }
261
+ },
262
+ [hideFishAudioAddingModal, addLlm],
263
+ );
264
+
265
+ return {
266
+ FishAudioAddingLoading: loading,
267
+ onFishAudioAddingOk,
268
+ FishAudioAddingVisible,
269
+ hideFishAudioAddingModal,
270
+ showFishAudioAddingModal,
271
+ };
272
+ };
273
+
274
  export const useSubmitBedrock = () => {
275
  const { addLlm, loading } = useAddLlm();
276
  const {
web/src/pages/user-setting/setting-model/index.tsx CHANGED
@@ -30,10 +30,12 @@ import { isLocalLlmFactory } from '../utils';
30
  import ApiKeyModal from './api-key-modal';
31
  import BedrockModal from './bedrock-modal';
32
  import { IconMap } from './constant';
 
33
  import {
34
  useHandleDeleteLlm,
35
  useSubmitApiKey,
36
  useSubmitBedrock,
 
37
  useSubmitHunyuan,
38
  useSubmitOllama,
39
  useSubmitSpark,
@@ -98,7 +100,8 @@ const ModelCard = ({ item, clickApiKey }: IModelCardProps) => {
98
  item.name === 'VolcEngine' ||
99
  item.name === 'Tencent Hunyuan' ||
100
  item.name === 'XunFei Spark' ||
101
- item.name === 'BaiduYiyan'
 
102
  ? t('addTheModel')
103
  : 'API-Key'}
104
  <SettingOutlined />
@@ -196,6 +199,14 @@ const UserSettingModel = () => {
196
  yiyanAddingLoading,
197
  } = useSubmityiyan();
198
 
 
 
 
 
 
 
 
 
199
  const {
200
  bedrockAddingLoading,
201
  onBedrockAddingOk,
@@ -211,6 +222,7 @@ const UserSettingModel = () => {
211
  'Tencent Hunyuan': showHunyuanAddingModal,
212
  'XunFei Spark': showSparkAddingModal,
213
  BaiduYiyan: showyiyanAddingModal,
 
214
  }),
215
  [
216
  showBedrockAddingModal,
@@ -218,6 +230,7 @@ const UserSettingModel = () => {
218
  showHunyuanAddingModal,
219
  showSparkAddingModal,
220
  showyiyanAddingModal,
 
221
  ],
222
  );
223
 
@@ -350,6 +363,13 @@ const UserSettingModel = () => {
350
  loading={yiyanAddingLoading}
351
  llmFactory={'BaiduYiyan'}
352
  ></YiyanModal>
 
 
 
 
 
 
 
353
  <BedrockModal
354
  visible={bedrockAddingVisible}
355
  hideModal={hideBedrockAddingModal}
 
30
  import ApiKeyModal from './api-key-modal';
31
  import BedrockModal from './bedrock-modal';
32
  import { IconMap } from './constant';
33
+ import FishAudioModal from './fish-audio-modal';
34
  import {
35
  useHandleDeleteLlm,
36
  useSubmitApiKey,
37
  useSubmitBedrock,
38
+ useSubmitFishAudio,
39
  useSubmitHunyuan,
40
  useSubmitOllama,
41
  useSubmitSpark,
 
100
  item.name === 'VolcEngine' ||
101
  item.name === 'Tencent Hunyuan' ||
102
  item.name === 'XunFei Spark' ||
103
+ item.name === 'BaiduYiyan' ||
104
+ item.name === 'Fish Audio'
105
  ? t('addTheModel')
106
  : 'API-Key'}
107
  <SettingOutlined />
 
199
  yiyanAddingLoading,
200
  } = useSubmityiyan();
201
 
202
+ const {
203
+ FishAudioAddingVisible,
204
+ hideFishAudioAddingModal,
205
+ showFishAudioAddingModal,
206
+ onFishAudioAddingOk,
207
+ FishAudioAddingLoading,
208
+ } = useSubmitFishAudio();
209
+
210
  const {
211
  bedrockAddingLoading,
212
  onBedrockAddingOk,
 
222
  'Tencent Hunyuan': showHunyuanAddingModal,
223
  'XunFei Spark': showSparkAddingModal,
224
  BaiduYiyan: showyiyanAddingModal,
225
+ 'Fish Audio': showFishAudioAddingModal,
226
  }),
227
  [
228
  showBedrockAddingModal,
 
230
  showHunyuanAddingModal,
231
  showSparkAddingModal,
232
  showyiyanAddingModal,
233
+ showFishAudioAddingModal,
234
  ],
235
  );
236
 
 
363
  loading={yiyanAddingLoading}
364
  llmFactory={'BaiduYiyan'}
365
  ></YiyanModal>
366
+ <FishAudioModal
367
+ visible={FishAudioAddingVisible}
368
+ hideModal={hideFishAudioAddingModal}
369
+ onOk={onFishAudioAddingOk}
370
+ loading={FishAudioAddingLoading}
371
+ llmFactory={'Fish Audio'}
372
+ ></FishAudioModal>
373
  <BedrockModal
374
  visible={bedrockAddingVisible}
375
  hideModal={hideBedrockAddingModal}
web/src/pages/user-setting/setting-model/spark-modal/index.tsx CHANGED
@@ -82,9 +82,9 @@ const SparkModal = ({
82
  <Form.Item<FieldType>
83
  label={t('addSparkAPIPassword')}
84
  name="spark_api_password"
85
- rules={[{ required: true, message: t('SparkPasswordMessage') }]}
86
  >
87
- <Input placeholder={t('SparkSIDMessage')} />
88
  </Form.Item>
89
  </Form>
90
  </Modal>
 
82
  <Form.Item<FieldType>
83
  label={t('addSparkAPIPassword')}
84
  name="spark_api_password"
85
+ rules={[{ required: true, message: t('SparkAPIPasswordMessage') }]}
86
  >
87
+ <Input placeholder={t('SparkAPIPasswordMessage')} />
88
  </Form.Item>
89
  </Form>
90
  </Modal>
web/src/pages/user-setting/setting-model/system-model-setting-modal/index.tsx CHANGED
@@ -83,6 +83,13 @@ const SystemModelSettingModal = ({
83
  >
84
  <Select options={allOptions[LlmModelType.Rerank]} />
85
  </Form.Item>
 
 
 
 
 
 
 
86
  </Form>
87
  </Modal>
88
  );
 
83
  >
84
  <Select options={allOptions[LlmModelType.Rerank]} />
85
  </Form.Item>
86
+ <Form.Item
87
+ label={t('ttsModel')}
88
+ name="tts_id"
89
+ tooltip={t('ttsModelTip')}
90
+ >
91
+ <Select options={allOptions[LlmModelType.TTS]} />
92
+ </Form.Item>
93
  </Form>
94
  </Modal>
95
  );