Spaces:

retopara
/

ragflow

Build error

App Files Files Community

黄腾

aopstudio Kevin Hu commited on Aug 26, 2024

Commit

e10ed78

1 Parent(s): 2324bcb

add support for TTS model (#2095)

Browse files

### What problem does this PR solve?

add support for TTS model
#1853

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <[email protected]>
Co-authored-by: Kevin Hu <[email protected]>

Files changed (23) hide show

api/apps/llm_app.py +14 -1
api/apps/user_app.py +1 -1
api/db/__init__.py +1 -0
api/db/db_models.py +12 -0
api/db/services/llm_service.py +14 -2
conf/llm_factories.json +7 -0
rag/llm/__init__.py +5 -0
rag/llm/tts_model.py +94 -0
requirements.txt +1 -0
requirements_arm.txt +1 -0
web/src/assets/svg/llm/fish-audio.svg +1 -0
web/src/constants/knowledge.ts +1 -0
web/src/hooks/llm-hooks.ts +1 -0
web/src/interfaces/database/knowledge.ts +1 -0
web/src/locales/en.ts +11 -0
web/src/locales/zh-traditional.ts +7 -0
web/src/locales/zh.ts +7 -0
web/src/pages/user-setting/setting-model/constant.ts +1 -0
web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx +101 -0
web/src/pages/user-setting/setting-model/hooks.ts +27 -0
web/src/pages/user-setting/setting-model/index.tsx +21 -1
web/src/pages/user-setting/setting-model/spark-modal/index.tsx +2 -2
web/src/pages/user-setting/setting-model/system-model-setting-modal/index.tsx +7 -0

api/apps/llm_app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from api.utils.api_utils import server_error_response, get_data_error_result, va
 from api.db import StatusEnum, LLMType
 from api.db.db_models import TenantLLM
 from api.utils.api_utils import get_json_result
-from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel
 import requests
 import ast
@@ -142,6 +142,10 @@ def add_llm():
         llm_name = req["llm_name"]
         api_key = '{' + f'"yiyan_ak": "{req.get("yiyan_ak", "")}", ' \
                 f'"yiyan_sk": "{req.get("yiyan_sk", "")}"' + '}'
     else:
         llm_name = req["llm_name"]
         api_key = req.get("api_key","xxxxxxxxxxxxxxx")
@@ -215,6 +219,15 @@ def add_llm():
                 pass
         except Exception as e:
             msg += f"\nFail to access model({llm['llm_name']})." + str(e)
     else:
         # TODO: check other type of models
         pass

 from api.db import StatusEnum, LLMType
 from api.db.db_models import TenantLLM
 from api.utils.api_utils import get_json_result
+from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
 import requests
 import ast
         llm_name = req["llm_name"]
         api_key = '{' + f'"yiyan_ak": "{req.get("yiyan_ak", "")}", ' \
                 f'"yiyan_sk": "{req.get("yiyan_sk", "")}"' + '}'
+    elif factory == "Fish Audio":
+        llm_name = req["llm_name"]
+        api_key = '{' + f'"fish_audio_ak": "{req.get("fish_audio_ak", "")}", ' \
+                f'"fish_audio_refid": "{req.get("fish_audio_refid", "59cb5986671546eaa6ca8ae6f29f6d22")}"' + '}'
     else:
         llm_name = req["llm_name"]
         api_key = req.get("api_key","xxxxxxxxxxxxxxx")
                 pass
         except Exception as e:
             msg += f"\nFail to access model({llm['llm_name']})." + str(e)
+    elif llm["model_type"] == LLMType.TTS:
+        mdl = TTSModel[factory](
+            key=llm["api_key"], model_name=llm["llm_name"], base_url=llm["api_base"]
+        )
+        try:
+            for resp in mdl.transcription("Hello~ Ragflower!"):
+                pass
+        except RuntimeError as e:
+            msg += f"\nFail to access model({llm['llm_name']})." + str(e)
     else:
         # TODO: check other type of models
         pass

api/apps/user_app.py CHANGED Viewed

@@ -410,7 +410,7 @@ def tenant_info():
 @manager.route("/set_tenant_info", methods=["POST"])
 @login_required
-@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id")
 def set_tenant_info():
     req = request.json
     try:

 @manager.route("/set_tenant_info", methods=["POST"])
 @login_required
+@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id", "tts_id")
 def set_tenant_info():
     req = request.json
     try:

api/db/__init__.py CHANGED Viewed

@@ -55,6 +55,7 @@ class LLMType(StrEnum):
     SPEECH2TEXT = 'speech2text'
     IMAGE2TEXT = 'image2text'
     RERANK = 'rerank'
 class ChatStyle(StrEnum):

     SPEECH2TEXT = 'speech2text'
     IMAGE2TEXT = 'image2text'
     RERANK = 'rerank'
+    TTS    = 'tts'
 class ChatStyle(StrEnum):

api/db/db_models.py CHANGED Viewed

@@ -449,6 +449,11 @@ class Tenant(DataBaseModel):
         null=False,
         help_text="default rerank model ID",
         index=True)
     parser_ids = CharField(
         max_length=256,
         null=False,
@@ -958,6 +963,13 @@ def migrate_db():
             )
         except Exception as e:
             pass
         try:
             migrate(
                 migrator.add_column('api_4_conversation', 'source',

         null=False,
         help_text="default rerank model ID",
         index=True)
+    tts_id = CharField(
+        max_length=256,
+        null=True,
+        help_text="default tts model ID",
+        index=True)
     parser_ids = CharField(
         max_length=256,
         null=False,
             )
         except Exception as e:
             pass
+        try:
+            migrate(
+                migrator.add_column("tenant","tts_id",
+                    CharField(max_length=256,null=True,help_text="default tts model ID",index=True))
+            )
+        except Exception as e:
+            pass
         try:
             migrate(
                 migrator.add_column('api_4_conversation', 'source',

api/db/services/llm_service.py CHANGED Viewed

@@ -15,7 +15,7 @@
 #
 from api.db.services.user_service import TenantService
 from api.settings import database_logger
-from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
 from api.db import LLMType
 from api.db.db_models import DB, UserTenant
 from api.db.db_models import LLMFactories, LLM, TenantLLM
@@ -75,6 +75,8 @@ class TenantLLMService(CommonService):
             mdlnm = tenant.llm_id if not llm_name else llm_name
         elif llm_type == LLMType.RERANK:
             mdlnm = tenant.rerank_id if not llm_name else llm_name
         else:
             assert False, "LLM type error"
@@ -127,6 +129,14 @@ class TenantLLMService(CommonService):
                 model_config["api_key"], model_config["llm_name"], lang,
                 base_url=model_config["api_base"]
             )
     @classmethod
     @DB.connection_context()
@@ -144,7 +154,9 @@ class TenantLLMService(CommonService):
         elif llm_type == LLMType.CHAT.value:
             mdlnm = tenant.llm_id if not llm_name else llm_name
         elif llm_type == LLMType.RERANK:
-            mdlnm = tenant.llm_id if not llm_name else llm_name
         else:
             assert False, "LLM type error"

 #
 from api.db.services.user_service import TenantService
 from api.settings import database_logger
+from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
 from api.db import LLMType
 from api.db.db_models import DB, UserTenant
 from api.db.db_models import LLMFactories, LLM, TenantLLM
             mdlnm = tenant.llm_id if not llm_name else llm_name
         elif llm_type == LLMType.RERANK:
             mdlnm = tenant.rerank_id if not llm_name else llm_name
+        elif llm_type == LLMType.TTS:
+            mdlnm = tenant.tts_id if not llm_name else llm_name
         else:
             assert False, "LLM type error"
                 model_config["api_key"], model_config["llm_name"], lang,
                 base_url=model_config["api_base"]
             )
+        if llm_type == LLMType.TTS:
+            if model_config["llm_factory"] not in TTSModel:
+                return
+            return TTSModel[model_config["llm_factory"]](
+                model_config["api_key"],
+                model_config["llm_name"],
+                base_url=model_config["api_base"],
+            )
     @classmethod
     @DB.connection_context()
         elif llm_type == LLMType.CHAT.value:
             mdlnm = tenant.llm_id if not llm_name else llm_name
         elif llm_type == LLMType.RERANK:
+            mdlnm = tenant.rerank_id if not llm_name else llm_name
+        elif llm_type == LLMType.TTS:
+            mdlnm = tenant.tts_id if not llm_name else llm_name
         else:
             assert False, "LLM type error"

conf/llm_factories.json CHANGED Viewed

@@ -3214,6 +3214,13 @@
             "tags": "LLM",
             "status": "1",
             "llm": []
         }
     ]
 }

             "tags": "LLM",
             "status": "1",
             "llm": []
+        },
+        {
+            "name": "Fish Audio",
+            "logo": "",
+            "tags": "TTS",
+            "status": "1",
+            "llm": []
         }
     ]
 }

rag/llm/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .chat_model import *
 from .cv_model import *
 from .rerank_model import *
 from .sequence2txt_model import *
 EmbeddingModel = {
     "Ollama": OllamaEmbed,
@@ -129,3 +130,7 @@ Seq2txtModel = {
     "Azure-OpenAI": AzureSeq2txt,
     "Xinference": XinferenceSeq2txt
 }

 from .cv_model import *
 from .rerank_model import *
 from .sequence2txt_model import *
+from .tts_model import *
 EmbeddingModel = {
     "Ollama": OllamaEmbed,
     "Azure-OpenAI": AzureSeq2txt,
     "Xinference": XinferenceSeq2txt
 }
+TTSModel = {
+    "Fish Audio": FishAudioTTS
+}

rag/llm/tts_model.py ADDED Viewed

	@@ -0,0 +1,94 @@

+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from typing import Annotated, Literal
+from abc import ABC
+import httpx
+import ormsgpack
+from pydantic import BaseModel, conint
+from rag.utils import num_tokens_from_string
+import json
+class ServeReferenceAudio(BaseModel):
+    audio: bytes
+    text: str
+class ServeTTSRequest(BaseModel):
+    text: str
+    chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
+    # Audio format
+    format: Literal["wav", "pcm", "mp3"] = "mp3"
+    mp3_bitrate: Literal[64, 128, 192] = 128
+    # References audios for in-context learning
+    references: list[ServeReferenceAudio] = []
+    # Reference id
+    # For example, if you want use https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/
+    # Just pass 7f92f8afb8ec43bf81429cc1c9199cb1
+    reference_id: str | None = None
+    # Normalize text for en & zh, this increase stability for numbers
+    normalize: bool = True
+    # Balance mode will reduce latency to 300ms, but may decrease stability
+    latency: Literal["normal", "balanced"] = "normal"
+class Base(ABC):
+    def __init__(self, key, model_name, base_url):
+        pass
+    def transcription(self, audio):
+        pass
+class FishAudioTTS(Base):
+    def __init__(self, key, model_name, base_url="https://api.fish.audio/v1/tts"):
+        if not base_url:
+            base_url = "https://api.fish.audio/v1/tts"
+        key = json.loads(key)
+        self.headers = {
+            "api-key": key.get("fish_audio_ak"),
+            "content-type": "application/msgpack",
+        }
+        self.ref_id = key.get("fish_audio_refid")
+        self.base_url = base_url
+    def transcription(self, text):
+        from http import HTTPStatus
+        request = request = ServeTTSRequest(text=text, reference_id=self.ref_id)
+        with httpx.Client() as client:
+            try:
+                with client.stream(
+                    method="POST",
+                    url=self.base_url,
+                    content=ormsgpack.packb(
+                        request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC
+                    ),
+                    headers=self.headers,
+                    timeout=None,
+                ) as response:
+                    if response.status_code == HTTPStatus.OK:
+                        for chunk in response.iter_bytes():
+                            yield chunk
+                    else:
+                        response.raise_for_status()
+                yield num_tokens_from_string(text)
+            except httpx.HTTPStatusError as e:
+                raise RuntimeError(f"**ERROR**: {e}")

requirements.txt CHANGED Viewed

@@ -47,6 +47,7 @@ openai==1.12.0
 opencv_python==4.9.0.80
 opencv_python_headless==4.9.0.80
 openpyxl==3.1.2
 pandas==2.2.2
 pdfplumber==0.10.4
 peewee==3.17.1

 opencv_python==4.9.0.80
 opencv_python_headless==4.9.0.80
 openpyxl==3.1.2
+ormsgpack==1.5.0
 pandas==2.2.2
 pdfplumber==0.10.4
 peewee==3.17.1

requirements_arm.txt CHANGED Viewed

@@ -74,6 +74,7 @@ ollama==0.1.9
 openai==1.12.0
 opencv-python==4.9.0.80
 openpyxl==3.1.2
 packaging==23.2
 pandas==2.2.1
 pdfminer.six==20221105

 openai==1.12.0
 opencv-python==4.9.0.80
 openpyxl==3.1.2
+ormsgpack==1.5.0
 packaging==23.2
 pandas==2.2.1
 pdfminer.six==20221105

web/src/assets/svg/llm/fish-audio.svg ADDED Viewed

web/src/constants/knowledge.ts CHANGED Viewed

@@ -48,6 +48,7 @@ export enum LlmModelType {
   Image2text = 'image2text',
   Speech2text = 'speech2text',
   Rerank = 'rerank',
 }
 export enum KnowledgeSearchParams {

   Image2text = 'image2text',
   Speech2text = 'speech2text',
   Rerank = 'rerank',
+  TTS = 'tts',
 }
 export enum KnowledgeSearchParams {

web/src/hooks/llm-hooks.ts CHANGED Viewed

@@ -87,6 +87,7 @@ export const useSelectLlmOptionsByModelType = () => {
       LlmModelType.Speech2text,
     ),
     [LlmModelType.Rerank]: groupOptionsByModelType(LlmModelType.Rerank),
   };
 };

       LlmModelType.Speech2text,
     ),
     [LlmModelType.Rerank]: groupOptionsByModelType(LlmModelType.Rerank),
+    [LlmModelType.TTS]: groupOptionsByModelType(LlmModelType.TTS),
   };
 };

web/src/interfaces/database/knowledge.ts CHANGED Viewed

@@ -71,6 +71,7 @@ export interface ITenantInfo {
   tenant_id: string;
   chat_id: string;
   speech2text_id: string;
 }
 export interface IChunk {

   tenant_id: string;
   chat_id: string;
   speech2text_id: string;
+  tts_id: string;
 }
 export interface IChunk {

web/src/locales/en.ts CHANGED Viewed

@@ -490,6 +490,9 @@ The above is the content you need to summarize.`,
         'The default ASR model all the newly created knowledgebase will use. Use this model to translate voices to corresponding text.',
       rerankModel: 'Rerank Model',
       rerankModelTip: `The default rerank model is used to rerank chunks retrieved by users' questions.`,
       workspace: 'Workspace',
       upgrade: 'Upgrade',
       addLlmTitle: 'Add LLM',
@@ -502,6 +505,7 @@ The above is the content you need to summarize.`,
       baseUrlNameMessage: 'Please input your base url!',
       vision: 'Does it support Vision?',
       ollamaLink: 'How to integrate {{name}}',
       volcModelNameMessage: 'Please input your model name!',
       addEndpointID: 'EndpointID of the model',
       endpointIDMessage: 'Please input your EndpointID of the model',
@@ -533,6 +537,13 @@ The above is the content you need to summarize.`,
       yiyanAKMessage: 'Please input your API KEY',
       addyiyanSK: 'yiyan Secret KEY',
       yiyanSKMessage: 'Please input your Secret KEY',
     },
     message: {
       registered: 'Registered!',

         'The default ASR model all the newly created knowledgebase will use. Use this model to translate voices to corresponding text.',
       rerankModel: 'Rerank Model',
       rerankModelTip: `The default rerank model is used to rerank chunks retrieved by users' questions.`,
+      ttsModel: 'TTS Model',
+      ttsModelTip:
+        'The default TTS model will be used to generate speech during conversations upon request.',
       workspace: 'Workspace',
       upgrade: 'Upgrade',
       addLlmTitle: 'Add LLM',
       baseUrlNameMessage: 'Please input your base url!',
       vision: 'Does it support Vision?',
       ollamaLink: 'How to integrate {{name}}',
+      FishAudioLink: 'How to use FishAudio',
       volcModelNameMessage: 'Please input your model name!',
       addEndpointID: 'EndpointID of the model',
       endpointIDMessage: 'Please input your EndpointID of the model',
       yiyanAKMessage: 'Please input your API KEY',
       addyiyanSK: 'yiyan Secret KEY',
       yiyanSKMessage: 'Please input your Secret KEY',
+      FishAudioModelNameMessage:
+        'Please give your speech synthesis model a name',
+      addFishAudioAK: 'Fish Audio API KEY',
+      addFishAudioAKMessage: 'Please input your API KEY',
+      addFishAudioRefID: 'FishAudio Refrence ID',
+      addFishAudioRefIDMessage:
+        'Please input the Reference ID (leave blank to use the default model).',
     },
     message: {
       registered: 'Registered!',

web/src/locales/zh-traditional.ts CHANGED Viewed

@@ -443,6 +443,8 @@ export default {
       systemModelSettings: '系統模型設置',
       chatModel: '聊天模型',
       chatModelTip: '所有新創建的知識庫都會使用默認的聊天LLM。',
       embeddingModel: '嵌入模型',
       embeddingModelTip: '所有新創建的知識庫都將使用的默認嵌入模型。',
       img2txtModel: 'img2Txt模型',
@@ -465,6 +467,7 @@ export default {
       modelTypeMessage: '請輸入模型類型！',
       baseUrlNameMessage: '請輸入基礎 Url！',
       ollamaLink: '如何集成 {{name}}',
       volcModelNameMessage: '請輸入模型名稱！',
       addEndpointID: '模型 EndpointID',
       endpointIDMessage: '請輸入模型對應的EndpointID',
@@ -496,6 +499,10 @@ export default {
       yiyanAKMessage: '請輸入 API KEY',
       addyiyanSK: '一言 Secret KEY',
       yiyanSKMessage: '請輸入 Secret KEY',
     },
     message: {
       registered: '註冊成功',

       systemModelSettings: '系統模型設置',
       chatModel: '聊天模型',
       chatModelTip: '所有新創建的知識庫都會使用默認的聊天LLM。',
+      ttsModel: '語音合成模型',
+      ttsModelTip: '默認的tts模型會被用於在對話過程中請求語音生成時使用。',
       embeddingModel: '嵌入模型',
       embeddingModelTip: '所有新創建的知識庫都將使用的默認嵌入模型。',
       img2txtModel: 'img2Txt模型',
       modelTypeMessage: '請輸入模型類型！',
       baseUrlNameMessage: '請輸入基礎 Url！',
       ollamaLink: '如何集成 {{name}}',
+      FishAudioLink: '如何使用Fish Audio',
       volcModelNameMessage: '請輸入模型名稱！',
       addEndpointID: '模型 EndpointID',
       endpointIDMessage: '請輸入模型對應的EndpointID',
       yiyanAKMessage: '請輸入 API KEY',
       addyiyanSK: '一言 Secret KEY',
       yiyanSKMessage: '請輸入 Secret KEY',
+      addFishAudioAK: 'Fish Audio API KEY',
+      addFishAudioAKMessage: '請輸入 API KEY',
+      addFishAudioRefID: 'FishAudio Refrence ID',
+      addFishAudioRefIDMessage: '請輸入引用模型的ID（留空表示使用默認模型）',
     },
     message: {
       registered: '註冊成功',

web/src/locales/zh.ts CHANGED Viewed

@@ -460,6 +460,8 @@ export default {
       systemModelSettings: '系统模型设置',
       chatModel: '聊天模型',
       chatModelTip: '所有新创建的知识库都会使用默认的聊天LLM。',
       embeddingModel: '嵌入模型',
       embeddingModelTip: '所有新创建的知识库都将使用的默认嵌入模型。',
       img2txtModel: 'Img2txt模型',
@@ -482,6 +484,7 @@ export default {
       modelTypeMessage: '请输入模型类型！',
       baseUrlNameMessage: '请输入基础 Url！',
       ollamaLink: '如何集成 {{name}}',
       volcModelNameMessage: '请输入模型名称！',
       addEndpointID: '模型 EndpointID',
       endpointIDMessage: '请输入模型对应的EndpointID',
@@ -513,6 +516,10 @@ export default {
       yiyanAKMessage: '请输入 API KEY',
       addyiyanSK: '一言 Secret KEY',
       yiyanSKMessage: '请输入 Secret KEY',
     },
     message: {
       registered: '注册成功',

       systemModelSettings: '系统模型设置',
       chatModel: '聊天模型',
       chatModelTip: '所有新创建的知识库都会使用默认的聊天LLM。',
+      ttsModel: 'TTS模型',
+      ttsModelTip: '默认的tts模型会被用于在对话过程中请求语音生成时使用',
       embeddingModel: '嵌入模型',
       embeddingModelTip: '所有新创建的知识库都将使用的默认嵌入模型。',
       img2txtModel: 'Img2txt模型',
       modelTypeMessage: '请输入模型类型！',
       baseUrlNameMessage: '请输入基础 Url！',
       ollamaLink: '如何集成 {{name}}',
+      FishAudioLink: '如何使用Fish Audio',
       volcModelNameMessage: '请输入模型名称！',
       addEndpointID: '模型 EndpointID',
       endpointIDMessage: '请输入模型对应的EndpointID',
       yiyanAKMessage: '请输入 API KEY',
       addyiyanSK: '一言 Secret KEY',
       yiyanSKMessage: '请输入 Secret KEY',
+      addFishAudioAK: 'Fish Audio API KEY',
+      FishAudioAKMessage: '请输入 API KEY',
+      addFishAudioRefID: 'FishAudio Refrence ID',
+      FishAudioRefIDMessage: '请输入引用模型的ID（留空表示使用默认模型）',
     },
     message: {
       registered: '注册成功',

web/src/pages/user-setting/setting-model/constant.ts CHANGED Viewed

@@ -35,6 +35,7 @@ export const IconMap = {
   'Tencent Hunyuan': 'hunyuan',
   'XunFei Spark': 'spark',
   BaiduYiyan: 'yiyan',
 };
 export const BedrockRegionList = [

   'Tencent Hunyuan': 'hunyuan',
   'XunFei Spark': 'spark',
   BaiduYiyan: 'yiyan',
+  'Fish Audio': 'fish-audio',
 };
 export const BedrockRegionList = [

web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx ADDED Viewed

	@@ -0,0 +1,101 @@

+import { useTranslate } from '@/hooks/common-hooks';
+import { IModalProps } from '@/interfaces/common';
+import { IAddLlmRequestBody } from '@/interfaces/request/llm';
+import { Flex, Form, Input, Modal, Select, Space } from 'antd';
+import omit from 'lodash/omit';
+type FieldType = IAddLlmRequestBody & {
+  fish_audio_ak: string;
+  fish_audio_refid: string;
+};
+const { Option } = Select;
+const FishAudioModal = ({
+  visible,
+  hideModal,
+  onOk,
+  loading,
+  llmFactory,
+}: IModalProps<IAddLlmRequestBody> & { llmFactory: string }) => {
+  const [form] = Form.useForm<FieldType>();
+  const { t } = useTranslate('setting');
+  const handleOk = async () => {
+    const values = await form.validateFields();
+    const modelType = values.model_type;
+    const data = {
+      ...omit(values),
+      model_type: modelType,
+      llm_factory: llmFactory,
+    };
+    console.info(data);
+    onOk?.(data);
+  };
+  return (
+    <Modal
+      title={t('addLlmTitle', { name: llmFactory })}
+      open={visible}
+      onOk={handleOk}
+      onCancel={hideModal}
+      okButtonProps={{ loading }}
+      footer={(originNode: React.ReactNode) => {
+        return (
+          <Flex justify={'space-between'}>
+            <a href={`https://fish.audio`} target="_blank" rel="noreferrer">
+              {t('FishAudioLink')}
+            </a>
+            <Space>{originNode}</Space>
+          </Flex>
+        );
+      }}
+      confirmLoading={loading}
+    >
+      <Form
+        name="basic"
+        style={{ maxWidth: 600 }}
+        autoComplete="off"
+        layout={'vertical'}
+        form={form}
+      >
+        <Form.Item<FieldType>
+          label={t('modelType')}
+          name="model_type"
+          initialValue={'tts'}
+          rules={[{ required: true, message: t('modelTypeMessage') }]}
+        >
+          <Select placeholder={t('modelTypeMessage')}>
+            <Option value="tts">tts</Option>
+          </Select>
+        </Form.Item>
+        <Form.Item<FieldType>
+          label={t('modelName')}
+          name="llm_name"
+          rules={[{ required: true, message: t('FishAudioModelNameMessage') }]}
+        >
+          <Input placeholder={t('FishAudioModelNameMessage')} />
+        </Form.Item>
+        <Form.Item<FieldType>
+          label={t('addFishAudioAK')}
+          name="FishAudio_ak"
+          rules={[{ required: true, message: t('FishAudioAKMessage') }]}
+        >
+          <Input placeholder={t('FishAudioAKMessage')} />
+        </Form.Item>
+        <Form.Item<FieldType>
+          label={t('addFishAudioRefID')}
+          name="FishAudio_refid"
+          rules={[{ required: false, message: t('FishAudioRefIDMessage') }]}
+        >
+          <Input placeholder={t('FishAudioRefIDMessage')} />
+        </Form.Item>
+      </Form>
+    </Modal>
+  );
+};
+export default FishAudioModal;

web/src/pages/user-setting/setting-model/hooks.ts CHANGED Viewed

@@ -244,6 +244,33 @@ export const useSubmityiyan = () => {
   };
 };
 export const useSubmitBedrock = () => {
   const { addLlm, loading } = useAddLlm();
   const {

   };
 };
+export const useSubmitFishAudio = () => {
+  const { addLlm, loading } = useAddLlm();
+  const {
+    visible: FishAudioAddingVisible,
+    hideModal: hideFishAudioAddingModal,
+    showModal: showFishAudioAddingModal,
+  } = useSetModalState();
+  const onFishAudioAddingOk = useCallback(
+    async (payload: IAddLlmRequestBody) => {
+      const ret = await addLlm(payload);
+      if (ret === 0) {
+        hideFishAudioAddingModal();
+      }
+    },
+    [hideFishAudioAddingModal, addLlm],
+  );
+  return {
+    FishAudioAddingLoading: loading,
+    onFishAudioAddingOk,
+    FishAudioAddingVisible,
+    hideFishAudioAddingModal,
+    showFishAudioAddingModal,
+  };
+};
 export const useSubmitBedrock = () => {
   const { addLlm, loading } = useAddLlm();
   const {

web/src/pages/user-setting/setting-model/index.tsx CHANGED Viewed

@@ -30,10 +30,12 @@ import { isLocalLlmFactory } from '../utils';
 import ApiKeyModal from './api-key-modal';
 import BedrockModal from './bedrock-modal';
 import { IconMap } from './constant';
 import {
   useHandleDeleteLlm,
   useSubmitApiKey,
   useSubmitBedrock,
   useSubmitHunyuan,
   useSubmitOllama,
   useSubmitSpark,
@@ -98,7 +100,8 @@ const ModelCard = ({ item, clickApiKey }: IModelCardProps) => {
                 item.name === 'VolcEngine' ||
                 item.name === 'Tencent Hunyuan' ||
                 item.name === 'XunFei Spark' ||
-                item.name === 'BaiduYiyan'
                   ? t('addTheModel')
                   : 'API-Key'}
                 <SettingOutlined />
@@ -196,6 +199,14 @@ const UserSettingModel = () => {
     yiyanAddingLoading,
   } = useSubmityiyan();
   const {
     bedrockAddingLoading,
     onBedrockAddingOk,
@@ -211,6 +222,7 @@ const UserSettingModel = () => {
       'Tencent Hunyuan': showHunyuanAddingModal,
       'XunFei Spark': showSparkAddingModal,
       BaiduYiyan: showyiyanAddingModal,
     }),
     [
       showBedrockAddingModal,
@@ -218,6 +230,7 @@ const UserSettingModel = () => {
       showHunyuanAddingModal,
       showSparkAddingModal,
       showyiyanAddingModal,
     ],
   );
@@ -350,6 +363,13 @@ const UserSettingModel = () => {
         loading={yiyanAddingLoading}
         llmFactory={'BaiduYiyan'}
       ></YiyanModal>
       <BedrockModal
         visible={bedrockAddingVisible}
         hideModal={hideBedrockAddingModal}

 import ApiKeyModal from './api-key-modal';
 import BedrockModal from './bedrock-modal';
 import { IconMap } from './constant';
+import FishAudioModal from './fish-audio-modal';
 import {
   useHandleDeleteLlm,
   useSubmitApiKey,
   useSubmitBedrock,
+  useSubmitFishAudio,
   useSubmitHunyuan,
   useSubmitOllama,
   useSubmitSpark,
                 item.name === 'VolcEngine' ||
                 item.name === 'Tencent Hunyuan' ||
                 item.name === 'XunFei Spark' ||
+                item.name === 'BaiduYiyan' ||
+                item.name === 'Fish Audio'
                   ? t('addTheModel')
                   : 'API-Key'}
                 <SettingOutlined />
     yiyanAddingLoading,
   } = useSubmityiyan();
+  const {
+    FishAudioAddingVisible,
+    hideFishAudioAddingModal,
+    showFishAudioAddingModal,
+    onFishAudioAddingOk,
+    FishAudioAddingLoading,
+  } = useSubmitFishAudio();
   const {
     bedrockAddingLoading,
     onBedrockAddingOk,
       'Tencent Hunyuan': showHunyuanAddingModal,
       'XunFei Spark': showSparkAddingModal,
       BaiduYiyan: showyiyanAddingModal,
+      'Fish Audio': showFishAudioAddingModal,
     }),
     [
       showBedrockAddingModal,
       showHunyuanAddingModal,
       showSparkAddingModal,
       showyiyanAddingModal,
+      showFishAudioAddingModal,
     ],
   );
         loading={yiyanAddingLoading}
         llmFactory={'BaiduYiyan'}
       ></YiyanModal>
+      <FishAudioModal
+        visible={FishAudioAddingVisible}
+        hideModal={hideFishAudioAddingModal}
+        onOk={onFishAudioAddingOk}
+        loading={FishAudioAddingLoading}
+        llmFactory={'Fish Audio'}
+      ></FishAudioModal>
       <BedrockModal
         visible={bedrockAddingVisible}
         hideModal={hideBedrockAddingModal}

web/src/pages/user-setting/setting-model/spark-modal/index.tsx CHANGED Viewed

@@ -82,9 +82,9 @@ const SparkModal = ({
         <Form.Item<FieldType>
           label={t('addSparkAPIPassword')}
           name="spark_api_password"
-          rules={[{ required: true, message: t('SparkPasswordMessage') }]}
         >
-          <Input placeholder={t('SparkSIDMessage')} />
         </Form.Item>
       </Form>
     </Modal>

         <Form.Item<FieldType>
           label={t('addSparkAPIPassword')}
           name="spark_api_password"
+          rules={[{ required: true, message: t('SparkAPIPasswordMessage') }]}
         >
+          <Input placeholder={t('SparkAPIPasswordMessage')} />
         </Form.Item>
       </Form>
     </Modal>

web/src/pages/user-setting/setting-model/system-model-setting-modal/index.tsx CHANGED Viewed

@@ -83,6 +83,13 @@ const SystemModelSettingModal = ({
         >
           <Select options={allOptions[LlmModelType.Rerank]} />
         </Form.Item>
       </Form>
     </Modal>
   );

         >
           <Select options={allOptions[LlmModelType.Rerank]} />
         </Form.Item>
+        <Form.Item
+          label={t('ttsModel')}
+          name="tts_id"
+          tooltip={t('ttsModelTip')}
+        >
+          <Select options={allOptions[LlmModelType.TTS]} />
+        </Form.Item>
       </Form>
     </Modal>
   );