Spaces:

CatPtain
/

dify

Running

App Files Files Community

CatPtain commited on about 1 month ago

Commit

b1224fd

verified ·

1 Parent(s): e94c687

Upload 1285 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +6 -0
api/core/model_runtime/README.md +70 -0
api/core/model_runtime/README_CN.md +89 -0
api/core/model_runtime/__init__.py +0 -0
api/core/model_runtime/model_providers/__base/__init__.py +0 -0
api/core/model_runtime/model_providers/__base/ai_model.py +334 -0
api/core/model_runtime/model_providers/__base/audio.mp3 +3 -0
api/core/model_runtime/model_providers/__base/large_language_model.py +904 -0
api/core/model_runtime/model_providers/__base/model_provider.py +121 -0
api/core/model_runtime/model_providers/__base/moderation_model.py +49 -0
api/core/model_runtime/model_providers/__base/rerank_model.py +69 -0
api/core/model_runtime/model_providers/__base/speech2text_model.py +59 -0
api/core/model_runtime/model_providers/__base/text2img_model.py +54 -0
api/core/model_runtime/model_providers/__base/text_embedding_model.py +111 -0
api/core/model_runtime/model_providers/__base/tokenizers/gpt2/merges.txt +0 -0
api/core/model_runtime/model_providers/__base/tokenizers/gpt2/special_tokens_map.json +23 -0
api/core/model_runtime/model_providers/__base/tokenizers/gpt2/tokenizer_config.json +33 -0
api/core/model_runtime/model_providers/__base/tokenizers/gpt2/vocab.json +0 -0
api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py +51 -0
api/core/model_runtime/model_providers/__base/tts_model.py +179 -0
api/core/model_runtime/model_providers/__init__.py +3 -0
api/core/model_runtime/model_providers/_position.yaml +43 -0
api/core/model_runtime/model_providers/anthropic/__init__.py +0 -0
api/core/model_runtime/model_providers/anthropic/_assets/icon_l_en.svg +78 -0
api/core/model_runtime/model_providers/anthropic/_assets/icon_s_en.svg +4 -0
api/core/model_runtime/model_providers/anthropic/anthropic.py +28 -0
api/core/model_runtime/model_providers/anthropic/anthropic.yaml +39 -0
api/core/model_runtime/model_providers/anthropic/llm/__init__.py +0 -0
api/core/model_runtime/model_providers/anthropic/llm/_position.yaml +10 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-2.1.yaml +36 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-2.yaml +37 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-haiku-20241022.yaml +38 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-sonnet-20240620.yaml +40 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-sonnet-20241022.yaml +40 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-3-haiku-20240307.yaml +39 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-3-opus-20240229.yaml +39 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-3-sonnet-20240229.yaml +39 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-instant-1.2.yaml +36 -0
api/core/model_runtime/model_providers/anthropic/llm/claude-instant-1.yaml +36 -0
api/core/model_runtime/model_providers/anthropic/llm/llm.py +654 -0
api/core/model_runtime/model_providers/azure_ai_studio/__init__.py +0 -0
api/core/model_runtime/model_providers/azure_ai_studio/_assets/icon_l_en.png +0 -0
api/core/model_runtime/model_providers/azure_ai_studio/_assets/icon_s_en.png +0 -0
api/core/model_runtime/model_providers/azure_ai_studio/azure_ai_studio.py +17 -0
api/core/model_runtime/model_providers/azure_ai_studio/azure_ai_studio.yaml +99 -0
api/core/model_runtime/model_providers/azure_ai_studio/llm/__init__.py +0 -0
api/core/model_runtime/model_providers/azure_ai_studio/llm/llm.py +345 -0
api/core/model_runtime/model_providers/azure_ai_studio/rerank/__init__.py +0 -0
api/core/model_runtime/model_providers/azure_ai_studio/rerank/rerank.py +164 -0
api/core/model_runtime/model_providers/azure_openai/__init__.py +0 -0

.gitattributes CHANGED Viewed

@@ -26,3 +26,9 @@ api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144229650.png fil
 api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144814617.png filter=lfs diff=lfs merge=lfs -text
 api/core/model_runtime/docs/zh_Hans/images/index/image-20231210165243632.png filter=lfs diff=lfs merge=lfs -text
 api/core/model_runtime/docs/zh_Hans/images/index/image.png filter=lfs diff=lfs merge=lfs -text

 api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144814617.png filter=lfs diff=lfs merge=lfs -text
 api/core/model_runtime/docs/zh_Hans/images/index/image-20231210165243632.png filter=lfs diff=lfs merge=lfs -text
 api/core/model_runtime/docs/zh_Hans/images/index/image.png filter=lfs diff=lfs merge=lfs -text
+api/core/model_runtime/model_providers/__base/audio.mp3 filter=lfs diff=lfs merge=lfs -text
+api/core/model_runtime/model_providers/gpustack/_assets/icon_l_en.png filter=lfs diff=lfs merge=lfs -text
+api/core/model_runtime/model_providers/leptonai/_assets/icon_l_en.png filter=lfs diff=lfs merge=lfs -text
+api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png filter=lfs diff=lfs merge=lfs -text
+api/core/model_runtime/model_providers/nvidia_nim/_assets/icon_l_en.png filter=lfs diff=lfs merge=lfs -text
+api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png filter=lfs diff=lfs merge=lfs -text

api/core/model_runtime/README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+# Model Runtime
+This module provides the interface for invoking and authenticating various models, and offers Dify a unified information and credentials form rule for model providers.
+- On one hand, it decouples models from upstream and downstream processes, facilitating horizontal expansion for developers,
+- On the other hand, it allows for direct display of providers and models in the frontend interface by simply defining them in the backend, eliminating the need to modify frontend logic.
+## Features
+- Supports capability invocation for 5 types of models
+  - `LLM` - LLM text completion, dialogue, pre-computed tokens capability
+  - `Text Embedding Model` - Text Embedding, pre-computed tokens capability
+  - `Rerank Model` - Segment Rerank capability
+  - `Speech-to-text Model` - Speech to text capability
+  - `Text-to-speech Model` - Text to speech capability
+  - `Moderation` - Moderation capability
+- Model provider display
+  ![image-20231210143654461](./docs/en_US/images/index/image-20231210143654461.png)
+  Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc. For detailed rule design, see: [Schema](./docs/en_US/schema.md).
+- Selectable model list display
+  ![image-20231210144229650](./docs/en_US/images/index/image-20231210144229650.png)
+  After configuring provider/model credentials, the dropdown (application orchestration interface/default model) allows viewing of the available LLM list. Greyed out items represent predefined model lists from providers without configured credentials, facilitating user review of supported models.
+  In addition, this list also returns configurable parameter information and rules for LLM, as shown below:
+  ![image-20231210144814617](./docs/en_US/images/index/image-20231210144814617.png)
+  These parameters are all defined in the backend, allowing different settings for various parameters supported by different models, as detailed in: [Schema](./docs/en_US/schema.md#ParameterRule).
+- Provider/model credential authentication
+  ![image-20231210151548521](./docs/en_US/images/index/image-20231210151548521.png)
+  ![image-20231210151628992](./docs/en_US/images/index/image-20231210151628992.png)
+  The provider list returns configuration information for the credentials form, which can be authenticated through Runtime's interface. The first image above is a provider credential DEMO, and the second is a model credential DEMO.
+## Structure
+![](./docs/en_US/images/index/image-20231210165243632.png)
+Model Runtime is divided into three layers:
+- The outermost layer is the factory method
+  It provides methods for obtaining all providers, all model lists, getting provider instances, and authenticating provider/model credentials.
+- The second layer is the provider layer
+  It provides the current provider's model list, model instance obtaining, provider credential authentication, and provider configuration rule information, **allowing horizontal expansion** to support different providers.
+- The bottom layer is the model layer
+  It offers direct invocation of various model types, predefined model configuration information, getting predefined/remote model lists, model credential authentication methods. Different models provide additional special methods, like LLM's pre-computed tokens method, cost information obtaining method, etc., **allowing horizontal expansion** for different models under the same provider (within supported model types).
+## Next Steps
+- Add new provider configuration: [Link](./docs/en_US/provider_scale_out.md)
+- Add new models for existing providers: [Link](./docs/en_US/provider_scale_out.md#AddModel)
+- View YAML configuration rules: [Link](./docs/en_US/schema.md)
+- Implement interface methods: [Link](./docs/en_US/interfaces.md)

api/core/model_runtime/README_CN.md ADDED Viewed

	@@ -0,0 +1,89 @@

+# Model Runtime
+该模块提供了各模型的调用、鉴权接口，并为 Dify 提供了统一的模型供应商的信息和凭据表单规则。
+- 一方面将模型和上下游解耦，方便开发者对模型横向扩展，
+- 另一方面提供了只需在后端定义供应商和模型，即可在前端页面直接展示，无需修改前端逻辑。
+## 功能介绍
+- 支持 5 种模型类型的能力调用
+  - `LLM` - LLM 文本补全、对话，预计算 tokens 能力
+  - `Text Embedidng Model` - 文本 Embedding ，预计算 tokens 能力
+  - `Rerank Model` - 分段 Rerank 能力
+  - `Speech-to-text Model` - 语音转文本能力
+  - `Text-to-speech Model` - 文本转语音能力
+  - `Moderation` - Moderation 能力
+- 模型供应商展示
+  ![image-20231210143654461](./docs/zh_Hans/images/index/image-20231210143654461.png)
+	展示所有已支持的供应商列表，除了返回供应商名称、图标之外，还提供了支持的模型类型列表，预定义模型列表、配置方式以及配置凭据的表单规则等等，规则设计详见：[Schema](./docs/zh_Hans/schema.md)。
+- 可选择的模型列表展示
+  ![image-20231210144229650](./docs/zh_Hans/images/index/image-20231210144229650.png)
+	配置供应商/模型凭据后，可在此下拉（应用编排界面/默认模型）查看可用的 LLM 列表，其中灰色的为未配置凭据供应商的预定义模型列表，方便用户查看已支持的模型。
+	除此之外，该列表还返回了 LLM 可配置的参数信息和规则，如下图：
+	![image-20231210144814617](./docs/zh_Hans/images/index/image-20231210144814617.png)
+	这里的参数均为后端定义，相比之前只有 5 种固定参数，这里可为不同模型设置所支持的各种参数，详见：[Schema](./docs/zh_Hans/schema.md#ParameterRule)。
+- 供应商/模型凭据鉴权
+  ![image-20231210151548521](./docs/zh_Hans/images/index/image-20231210151548521.png)
+![image-20231210151628992](./docs/zh_Hans/images/index/image-20231210151628992.png)
+	供应商列表返回了凭据表单的配置信息，可通过 Runtime 提供的接口对凭据进行鉴权，上图 1 为供应商凭据 DEMO，上图 2 为模型凭据 DEMO。
+## 结构
+![](./docs/zh_Hans/images/index/image-20231210165243632.png)
+Model Runtime 分三层：
+- 最外层为工厂方法
+  提供获取所有供应商、所有模型列表、获取供应商实例、供应商/模型凭据鉴权方法。
+- 第二层为供应商层
+  提供获取当前供应商模型列表、获取模型实例、供应商凭据鉴权、供应商配置规则信息，**可横向扩展**以支持不同的供应商。
+  对于供应商/模型凭据，有两种情况
+  - 如OpenAI这类中心化供应商，需要定义如**api_key**这类的鉴权凭据
+  - 如[**Xinference**](https://github.com/xorbitsai/inference)这类本地部署的供应商，需要定义如**server_url**这类的地址凭据，有时候还需要定义**model_uid**之类的模型类型凭据，就像下面这样，当在供应商层定义了这些凭据后，就可以在前端页面上直接展示，无需修改前端逻辑。
+  ![Alt text](docs/zh_Hans/images/index/image.png)
+  当配置好凭据后，就可以通过DifyRuntime的外部接口直接获取到对应供应商所需要的**Schema**（凭据表单规则），从而在可以在不修改前端逻辑的情况下，提供新的供应商/模型的支持。
+- 最底层为模型层
+  提供各种模型类型的直接调用、预定义模型配置信息、获取预定义/远程模型列表、模型凭据鉴权方法，不同模型额外提供了特殊方法，如 LLM 提供预计算 tokens 方法、获取费用信息方法等，**可横向扩展**同供应商下不同的模型（支持的模型类型下）。
+  在这里我们需要先区分模型参数与模型凭据。
+  - 模型参数(**在本层定义**)：这是一类经常需要变动，随时调整的参数，如 LLM 的 **max_tokens**、**temperature** 等，这些参数是由用户在前端页面上进行调整的，因此需要在后端定义参数的规则，以便前端页面进行展示和调整。在DifyRuntime中，他们的参数名一般为**model_parameters: dict[str, any]**。
+  - 模型凭据(**在供应商层定义**)：这是一类不经常变动，一般在配置好后就不会再变动的参数，如 **api_key**、**server_url** 等。在DifyRuntime中，他们的参数名一般为**credentials: dict[str, any]**，Provider层的credentials会直接被传递到这一层，不需要再单独定义。
+## 下一步
+### [增加新的供应商配置 👈🏻](./docs/zh_Hans/provider_scale_out.md)
+当添加后，这里将会出现一个新的供应商
+![Alt text](docs/zh_Hans/images/index/image-1.png)
+### [为已存在的供应商新增模型 👈🏻](./docs/zh_Hans/provider_scale_out.md#增加模型)
+当添加后，对应供应商的模型列表中将会出现一个新的预定义模型供用户选择，如GPT-3.5 GPT-4 ChatGLM3-6b等，而对于支持自定义模型的供应商，则不需要新增模型。
+![Alt text](docs/zh_Hans/images/index/image-2.png)
+### [接口的具体实现 👈🏻](./docs/zh_Hans/interfaces.md)
+你可以在这里找到你想要查看的接口的具体实现，以及接口的参数和返回值的具体含义。

api/core/model_runtime/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/__base/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/__base/ai_model.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import decimal
+import os
+from abc import ABC, abstractmethod
+from typing import Optional
+from pydantic import ConfigDict
+from core.helper.position_helper import get_position_map, sort_by_position_map
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.defaults import PARAMETER_RULE_TEMPLATE
+from core.model_runtime.entities.model_entities import (
+    AIModelEntity,
+    DefaultParameterName,
+    FetchFrom,
+    ModelType,
+    PriceConfig,
+    PriceInfo,
+    PriceType,
+)
+from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
+from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
+from core.tools.utils.yaml_utils import load_yaml_file
+class AIModel(ABC):
+    """
+    Base class for all models.
+    """
+    model_type: ModelType
+    model_schemas: Optional[list[AIModelEntity]] = None
+    started_at: float = 0
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    @abstractmethod
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        raise NotImplementedError
+    @property
+    @abstractmethod
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+        :return: Invoke error mapping
+        """
+        raise NotImplementedError
+    def _transform_invoke_error(self, error: Exception) -> InvokeError:
+        """
+        Transform invoke error to unified error
+        :param error: model invoke error
+        :return: unified error
+        """
+        provider_name = self.__class__.__module__.split(".")[-3]
+        for invoke_error, model_errors in self._invoke_error_mapping.items():
+            if isinstance(error, tuple(model_errors)):
+                if invoke_error == InvokeAuthorizationError:
+                    return invoke_error(
+                        description=(
+                            f"[{provider_name}] Incorrect model credentials provided, please check and try again."
+                        )
+                    )
+                return invoke_error(description=f"[{provider_name}] {invoke_error.description}, {str(error)}")
+        return InvokeError(description=f"[{provider_name}] Error: {str(error)}")
+    def get_price(self, model: str, credentials: dict, price_type: PriceType, tokens: int) -> PriceInfo:
+        """
+        Get price for given model and tokens
+        :param model: model name
+        :param credentials: model credentials
+        :param price_type: price type
+        :param tokens: number of tokens
+        :return: price info
+        """
+        # get model schema
+        model_schema = self.get_model_schema(model, credentials)
+        # get price info from predefined model schema
+        price_config: Optional[PriceConfig] = None
+        if model_schema and model_schema.pricing:
+            price_config = model_schema.pricing
+        # get unit price
+        unit_price = None
+        if price_config:
+            if price_type == PriceType.INPUT:
+                unit_price = price_config.input
+            elif price_type == PriceType.OUTPUT and price_config.output is not None:
+                unit_price = price_config.output
+        if unit_price is None:
+            return PriceInfo(
+                unit_price=decimal.Decimal("0.0"),
+                unit=decimal.Decimal("0.0"),
+                total_amount=decimal.Decimal("0.0"),
+                currency="USD",
+            )
+        # calculate total amount
+        if not price_config:
+            raise ValueError(f"Price config not found for model {model}")
+        total_amount = tokens * unit_price * price_config.unit
+        total_amount = total_amount.quantize(decimal.Decimal("0.0000001"), rounding=decimal.ROUND_HALF_UP)
+        return PriceInfo(
+            unit_price=unit_price,
+            unit=price_config.unit,
+            total_amount=total_amount,
+            currency=price_config.currency,
+        )
+    def predefined_models(self) -> list[AIModelEntity]:
+        """
+        Get all predefined models for given provider.
+        :return:
+        """
+        if self.model_schemas:
+            return self.model_schemas
+        model_schemas = []
+        # get module name
+        model_type = self.__class__.__module__.split(".")[-1]
+        # get provider name
+        provider_name = self.__class__.__module__.split(".")[-3]
+        # get the path of current classes
+        current_path = os.path.abspath(__file__)
+        # get parent path of the current path
+        provider_model_type_path = os.path.join(
+            os.path.dirname(os.path.dirname(current_path)), provider_name, model_type
+        )
+        # get all yaml files path under provider_model_type_path that do not start with __
+        model_schema_yaml_paths = [
+            os.path.join(provider_model_type_path, model_schema_yaml)
+            for model_schema_yaml in os.listdir(provider_model_type_path)
+            if not model_schema_yaml.startswith("__")
+            and not model_schema_yaml.startswith("_")
+            and os.path.isfile(os.path.join(provider_model_type_path, model_schema_yaml))
+            and model_schema_yaml.endswith(".yaml")
+        ]
+        # get _position.yaml file path
+        position_map = get_position_map(provider_model_type_path)
+        # traverse all model_schema_yaml_paths
+        for model_schema_yaml_path in model_schema_yaml_paths:
+            # read yaml data from yaml file
+            yaml_data = load_yaml_file(model_schema_yaml_path)
+            new_parameter_rules = []
+            for parameter_rule in yaml_data.get("parameter_rules", []):
+                if "use_template" in parameter_rule:
+                    try:
+                        default_parameter_name = DefaultParameterName.value_of(parameter_rule["use_template"])
+                        default_parameter_rule = self._get_default_parameter_rule_variable_map(default_parameter_name)
+                        copy_default_parameter_rule = default_parameter_rule.copy()
+                        copy_default_parameter_rule.update(parameter_rule)
+                        parameter_rule = copy_default_parameter_rule
+                    except ValueError:
+                        pass
+                if "label" not in parameter_rule:
+                    parameter_rule["label"] = {"zh_Hans": parameter_rule["name"], "en_US": parameter_rule["name"]}
+                new_parameter_rules.append(parameter_rule)
+            yaml_data["parameter_rules"] = new_parameter_rules
+            if "label" not in yaml_data:
+                yaml_data["label"] = {"zh_Hans": yaml_data["model"], "en_US": yaml_data["model"]}
+            yaml_data["fetch_from"] = FetchFrom.PREDEFINED_MODEL.value
+            try:
+                # yaml_data to entity
+                model_schema = AIModelEntity(**yaml_data)
+            except Exception as e:
+                model_schema_yaml_file_name = os.path.basename(model_schema_yaml_path).rstrip(".yaml")
+                raise Exception(
+                    f"Invalid model schema for {provider_name}.{model_type}.{model_schema_yaml_file_name}: {str(e)}"
+                )
+            # cache model schema
+            model_schemas.append(model_schema)
+        # resort model schemas by position
+        model_schemas = sort_by_position_map(position_map, model_schemas, lambda x: x.model)
+        # cache model schemas
+        self.model_schemas = model_schemas
+        return model_schemas
+    def get_model_schema(self, model: str, credentials: Optional[dict] = None) -> Optional[AIModelEntity]:
+        """
+        Get model schema by model name and credentials
+        :param model: model name
+        :param credentials: model credentials
+        :return: model schema
+        """
+        # Try to get model schema from predefined models
+        for predefined_model in self.predefined_models():
+            if model == predefined_model.model:
+                return predefined_model
+        # Try to get model schema from credentials
+        if credentials:
+            model_schema = self.get_customizable_model_schema_from_credentials(model, credentials)
+            if model_schema:
+                return model_schema
+        return None
+    def get_customizable_model_schema_from_credentials(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
+        """
+        Get customizable model schema from credentials
+        :param model: model name
+        :param credentials: model credentials
+        :return: model schema
+        """
+        return self._get_customizable_model_schema(model, credentials)
+    def _get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
+        """
+        Get customizable model schema and fill in the template
+        """
+        schema = self.get_customizable_model_schema(model, credentials)
+        if not schema:
+            return None
+        # fill in the template
+        new_parameter_rules = []
+        for parameter_rule in schema.parameter_rules:
+            if parameter_rule.use_template:
+                try:
+                    default_parameter_name = DefaultParameterName.value_of(parameter_rule.use_template)
+                    default_parameter_rule = self._get_default_parameter_rule_variable_map(default_parameter_name)
+                    if not parameter_rule.max and "max" in default_parameter_rule:
+                        parameter_rule.max = default_parameter_rule["max"]
+                    if not parameter_rule.min and "min" in default_parameter_rule:
+                        parameter_rule.min = default_parameter_rule["min"]
+                    if not parameter_rule.default and "default" in default_parameter_rule:
+                        parameter_rule.default = default_parameter_rule["default"]
+                    if not parameter_rule.precision and "precision" in default_parameter_rule:
+                        parameter_rule.precision = default_parameter_rule["precision"]
+                    if not parameter_rule.required and "required" in default_parameter_rule:
+                        parameter_rule.required = default_parameter_rule["required"]
+                    if not parameter_rule.help and "help" in default_parameter_rule:
+                        parameter_rule.help = I18nObject(
+                            en_US=default_parameter_rule["help"]["en_US"],
+                        )
+                    if (
+                        parameter_rule.help
+                        and not parameter_rule.help.en_US
+                        and ("help" in default_parameter_rule and "en_US" in default_parameter_rule["help"])
+                    ):
+                        parameter_rule.help.en_US = default_parameter_rule["help"]["en_US"]
+                    if (
+                        parameter_rule.help
+                        and not parameter_rule.help.zh_Hans
+                        and ("help" in default_parameter_rule and "zh_Hans" in default_parameter_rule["help"])
+                    ):
+                        parameter_rule.help.zh_Hans = default_parameter_rule["help"].get(
+                            "zh_Hans", default_parameter_rule["help"]["en_US"]
+                        )
+                except ValueError:
+                    pass
+            new_parameter_rules.append(parameter_rule)
+        schema.parameter_rules = new_parameter_rules
+        return schema
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
+        """
+        Get customizable model schema
+        :param model: model name
+        :param credentials: model credentials
+        :return: model schema
+        """
+        return None
+    def _get_default_parameter_rule_variable_map(self, name: DefaultParameterName) -> dict:
+        """
+        Get default parameter rule for given name
+        :param name: parameter name
+        :return: parameter rule
+        """
+        default_parameter_rule = PARAMETER_RULE_TEMPLATE.get(name)
+        if not default_parameter_rule:
+            raise Exception(f"Invalid model parameter rule name {name}")
+        return default_parameter_rule
+    def _get_num_tokens_by_gpt2(self, text: str) -> int:
+        """
+        Get number of tokens for given prompt messages by gpt2
+        Some provider models do not provide an interface for obtaining the number of tokens.
+        Here, the gpt2 tokenizer is used to calculate the number of tokens.
+        This method can be executed offline, and the gpt2 tokenizer has been cached in the project.
+        :param text: plain text of prompt. You need to convert the original message to plain text
+        :return: number of tokens
+        """
+        return GPT2Tokenizer.get_num_tokens(text)

api/core/model_runtime/model_providers/__base/audio.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29b714073410fefc10ecb80526b5c7c33df73b0830ff0e7778d5065a6cfcae3e
+size 218880

api/core/model_runtime/model_providers/__base/large_language_model.py ADDED Viewed

	@@ -0,0 +1,904 @@

+import logging
+import re
+import time
+from abc import abstractmethod
+from collections.abc import Generator, Sequence
+from typing import Optional, Union
+from pydantic import ConfigDict
+from configs import dify_config
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.callbacks.logging_callback import LoggingCallback
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageTool,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import (
+    ModelPropertyKey,
+    ModelType,
+    ParameterRule,
+    ParameterType,
+    PriceType,
+)
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+logger = logging.getLogger(__name__)
+class LargeLanguageModel(AIModel):
+    """
+    Model class for large language model.
+    """
+    model_type: ModelType = ModelType.LLM
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    def invoke(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: Optional[dict] = None,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :param callbacks: callbacks
+        :return: full response or stream response chunk generator result
+        """
+        # validate and filter model parameters
+        if model_parameters is None:
+            model_parameters = {}
+        model_parameters = self._validate_and_filter_model_parameters(model, model_parameters, credentials)
+        self.started_at = time.perf_counter()
+        callbacks = callbacks or []
+        if dify_config.DEBUG:
+            callbacks.append(LoggingCallback())
+        # trigger before invoke callbacks
+        self._trigger_before_invoke_callbacks(
+            model=model,
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user,
+            callbacks=callbacks,
+        )
+        try:
+            if "response_format" in model_parameters and model_parameters["response_format"] in {"JSON", "XML"}:
+                result = self._code_block_mode_wrapper(
+                    model=model,
+                    credentials=credentials,
+                    prompt_messages=prompt_messages,
+                    model_parameters=model_parameters,
+                    tools=tools,
+                    stop=stop,
+                    stream=stream,
+                    user=user,
+                    callbacks=callbacks,
+                )
+            else:
+                result = self._invoke(
+                    model=model,
+                    credentials=credentials,
+                    prompt_messages=prompt_messages,
+                    model_parameters=model_parameters,
+                    tools=tools,
+                    stop=stop,
+                    stream=stream,
+                    user=user,
+                )
+        except Exception as e:
+            self._trigger_invoke_error_callbacks(
+                model=model,
+                ex=e,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                callbacks=callbacks,
+            )
+            raise self._transform_invoke_error(e)
+        if stream and isinstance(result, Generator):
+            return self._invoke_result_generator(
+                model=model,
+                result=result,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                callbacks=callbacks,
+            )
+        elif isinstance(result, LLMResult):
+            self._trigger_after_invoke_callbacks(
+                model=model,
+                result=result,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                callbacks=callbacks,
+            )
+        return result
+    def _code_block_mode_wrapper(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Code block mode wrapper, ensure the response is a code block with output markdown quote
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :param callbacks: callbacks
+        :return: full response or stream response chunk generator result
+        """
+        block_prompts = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+<instructions>
+{{instructions}}
+</instructions>
+"""  # noqa: E501
+        code_block = model_parameters.get("response_format", "")
+        if not code_block:
+            return self._invoke(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+            )
+        model_parameters.pop("response_format")
+        stop = list(stop) if stop is not None else []
+        stop.extend(["\n```", "```\n"])
+        block_prompts = block_prompts.replace("{{block}}", code_block)
+        # check if there is a system message
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+            # override the system message
+            prompt_messages[0] = SystemPromptMessage(
+                content=block_prompts.replace("{{instructions}}", str(prompt_messages[0].content))
+            )
+        else:
+            # insert the system message
+            prompt_messages.insert(
+                0,
+                SystemPromptMessage(
+                    content=block_prompts.replace("{{instructions}}", f"Please output a valid {code_block} object.")
+                ),
+            )
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[-1], UserPromptMessage):
+            # add ```JSON\n to the last text message
+            if isinstance(prompt_messages[-1].content, str):
+                prompt_messages[-1].content += f"\n```{code_block}\n"
+            elif isinstance(prompt_messages[-1].content, list):
+                for i in range(len(prompt_messages[-1].content) - 1, -1, -1):
+                    if prompt_messages[-1].content[i].type == PromptMessageContentType.TEXT:
+                        prompt_messages[-1].content[i].data += f"\n```{code_block}\n"
+                        break
+        else:
+            # append a user message
+            prompt_messages.append(UserPromptMessage(content=f"```{code_block}\n"))
+        response = self._invoke(
+            model=model,
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user,
+        )
+        if isinstance(response, Generator):
+            first_chunk = next(response)
+            def new_generator():
+                yield first_chunk
+                yield from response
+            if first_chunk.delta.message.content and first_chunk.delta.message.content.startswith("`"):
+                return self._code_block_mode_stream_processor_with_backtick(
+                    model=model, prompt_messages=prompt_messages, input_generator=new_generator()
+                )
+            else:
+                return self._code_block_mode_stream_processor(
+                    model=model, prompt_messages=prompt_messages, input_generator=new_generator()
+                )
+        return response
+    def _code_block_mode_stream_processor(
+        self, model: str, prompt_messages: list[PromptMessage], input_generator: Generator[LLMResultChunk, None, None]
+    ) -> Generator[LLMResultChunk, None, None]:
+        """
+        Code block mode stream processor, ensure the response is a code block with output markdown quote
+        :param model: model name
+        :param prompt_messages: prompt messages
+        :param input_generator: input generator
+        :return: output generator
+        """
+        state = "normal"
+        backtick_count = 0
+        for piece in input_generator:
+            if piece.delta.message.content:
+                content = piece.delta.message.content
+                piece.delta.message.content = ""
+                yield piece
+                content_piece = content
+            else:
+                yield piece
+                continue
+            new_piece: str = ""
+            for char in content_piece:
+                char = str(char)
+                if state == "normal":
+                    if char == "`":
+                        state = "in_backticks"
+                        backtick_count = 1
+                    else:
+                        new_piece += char
+                elif state == "in_backticks":
+                    if char == "`":
+                        backtick_count += 1
+                        if backtick_count == 3:
+                            state = "skip_content"
+                            backtick_count = 0
+                    else:
+                        new_piece += "`" * backtick_count + char
+                        state = "normal"
+                        backtick_count = 0
+                elif state == "skip_content":
+                    if char.isspace():
+                        state = "normal"
+            if new_piece:
+                yield LLMResultChunk(
+                    model=model,
+                    prompt_messages=prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=0,
+                        message=AssistantPromptMessage(content=new_piece, tool_calls=[]),
+                    ),
+                )
+    def _code_block_mode_stream_processor_with_backtick(
+        self, model: str, prompt_messages: list, input_generator: Generator[LLMResultChunk, None, None]
+    ) -> Generator[LLMResultChunk, None, None]:
+        """
+        Code block mode stream processor, ensure the response is a code block with output markdown quote.
+        This version skips the language identifier that follows the opening triple backticks.
+        :param model: model name
+        :param prompt_messages: prompt messages
+        :param input_generator: input generator
+        :return: output generator
+        """
+        state = "search_start"
+        backtick_count = 0
+        for piece in input_generator:
+            if piece.delta.message.content:
+                content = piece.delta.message.content
+                # Reset content to ensure we're only processing and yielding the relevant parts
+                piece.delta.message.content = ""
+                # Yield a piece with cleared content before processing it to maintain the generator structure
+                yield piece
+                content_piece = content
+            else:
+                # Yield pieces without content directly
+                yield piece
+                continue
+            if state == "done":
+                continue
+            new_piece: str = ""
+            for char in content_piece:
+                if state == "search_start":
+                    if char == "`":
+                        backtick_count += 1
+                        if backtick_count == 3:
+                            state = "skip_language"
+                            backtick_count = 0
+                    else:
+                        backtick_count = 0
+                elif state == "skip_language":
+                    # Skip everything until the first newline, marking the end of the language identifier
+                    if char == "\n":
+                        state = "in_code_block"
+                elif state == "in_code_block":
+                    if char == "`":
+                        backtick_count += 1
+                        if backtick_count == 3:
+                            state = "done"
+                            break
+                    else:
+                        if backtick_count > 0:
+                            # If backticks were counted but we're still collecting content, it was a false start
+                            new_piece += "`" * backtick_count
+                            backtick_count = 0
+                        new_piece += str(char)
+                elif state == "done":
+                    break
+            if new_piece:
+                # Only yield content collected within the code block
+                yield LLMResultChunk(
+                    model=model,
+                    prompt_messages=prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=0,
+                        message=AssistantPromptMessage(content=new_piece, tool_calls=[]),
+                    ),
+                )
+    def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
+        """
+        If the reasoning response is from delta.get("reasoning_content"), we wrap
+        it with HTML think tag.
+        :param delta: delta dictionary from LLM streaming response
+        :param is_reasoning: is reasoning
+        :return: tuple of (processed_content, is_reasoning)
+        """
+        content = delta.get("content") or ""
+        reasoning_content = delta.get("reasoning_content")
+        if reasoning_content:
+            if not is_reasoning:
+                content = "<think>\n" + reasoning_content
+                is_reasoning = True
+            else:
+                content = reasoning_content
+        elif is_reasoning and content:
+            # do not end reasoning when content is empty
+            # there may be more reasoning_content later that follows previous reasoning closely
+            content = "\n</think>" + content
+            is_reasoning = False
+        return content, is_reasoning
+    def _invoke_result_generator(
+        self,
+        model: str,
+        result: Generator,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> Generator:
+        """
+        Invoke result generator
+        :param result: result generator
+        :return: result generator
+        """
+        callbacks = callbacks or []
+        prompt_message = AssistantPromptMessage(content="")
+        usage = None
+        system_fingerprint = None
+        real_model = model
+        try:
+            for chunk in result:
+                yield chunk
+                self._trigger_new_chunk_callbacks(
+                    chunk=chunk,
+                    model=model,
+                    credentials=credentials,
+                    prompt_messages=prompt_messages,
+                    model_parameters=model_parameters,
+                    tools=tools,
+                    stop=stop,
+                    stream=stream,
+                    user=user,
+                    callbacks=callbacks,
+                )
+                prompt_message.content += chunk.delta.message.content
+                real_model = chunk.model
+                if chunk.delta.usage:
+                    usage = chunk.delta.usage
+                if chunk.system_fingerprint:
+                    system_fingerprint = chunk.system_fingerprint
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+        self._trigger_after_invoke_callbacks(
+            model=model,
+            result=LLMResult(
+                model=real_model,
+                prompt_messages=prompt_messages,
+                message=prompt_message,
+                usage=usage or LLMUsage.empty_usage(),
+                system_fingerprint=system_fingerprint,
+            ),
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user,
+            callbacks=callbacks,
+        )
+    @abstractmethod
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def get_num_tokens(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> int:
+        """
+        Get number of tokens for given prompt messages
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return:
+        """
+        raise NotImplementedError
+    def enforce_stop_tokens(self, text: str, stop: list[str]) -> str:
+        """Cut off the text as soon as any stop words occur."""
+        return re.split("|".join(stop), text, maxsplit=1)[0]
+    def get_parameter_rules(self, model: str, credentials: dict) -> list[ParameterRule]:
+        """
+        Get parameter rules
+        :param model: model name
+        :param credentials: model credentials
+        :return: parameter rules
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if model_schema:
+            return model_schema.parameter_rules
+        return []
+    def get_model_mode(self, model: str, credentials: Optional[dict] = None) -> LLMMode:
+        """
+        Get model mode
+        :param model: model name
+        :param credentials: model credentials
+        :return: model mode
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        mode = LLMMode.CHAT
+        if model_schema and model_schema.model_properties.get(ModelPropertyKey.MODE):
+            mode = LLMMode.value_of(model_schema.model_properties[ModelPropertyKey.MODE])
+        return mode
+    def _calc_response_usage(
+        self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int
+    ) -> LLMUsage:
+        """
+        Calculate response usage
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_tokens: prompt tokens
+        :param completion_tokens: completion tokens
+        :return: usage
+        """
+        # get prompt price info
+        prompt_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.INPUT,
+            tokens=prompt_tokens,
+        )
+        # get completion price info
+        completion_price_info = self.get_price(
+            model=model, credentials=credentials, price_type=PriceType.OUTPUT, tokens=completion_tokens
+        )
+        # transform usage
+        usage = LLMUsage(
+            prompt_tokens=prompt_tokens,
+            prompt_unit_price=prompt_price_info.unit_price,
+            prompt_price_unit=prompt_price_info.unit,
+            prompt_price=prompt_price_info.total_amount,
+            completion_tokens=completion_tokens,
+            completion_unit_price=completion_price_info.unit_price,
+            completion_price_unit=completion_price_info.unit,
+            completion_price=completion_price_info.total_amount,
+            total_tokens=prompt_tokens + completion_tokens,
+            total_price=prompt_price_info.total_amount + completion_price_info.total_amount,
+            currency=prompt_price_info.currency,
+            latency=time.perf_counter() - self.started_at,
+        )
+        return usage
+    def _trigger_before_invoke_callbacks(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> None:
+        """
+        Trigger before invoke callbacks
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :param callbacks: callbacks
+        """
+        if callbacks:
+            for callback in callbacks:
+                try:
+                    callback.on_before_invoke(
+                        llm_instance=self,
+                        model=model,
+                        credentials=credentials,
+                        prompt_messages=prompt_messages,
+                        model_parameters=model_parameters,
+                        tools=tools,
+                        stop=stop,
+                        stream=stream,
+                        user=user,
+                    )
+                except Exception as e:
+                    if callback.raise_error:
+                        raise e
+                    else:
+                        logger.warning(f"Callback {callback.__class__.__name__} on_before_invoke failed with error {e}")
+    def _trigger_new_chunk_callbacks(
+        self,
+        chunk: LLMResultChunk,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> None:
+        """
+        Trigger new chunk callbacks
+        :param chunk: chunk
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        """
+        if callbacks:
+            for callback in callbacks:
+                try:
+                    callback.on_new_chunk(
+                        llm_instance=self,
+                        chunk=chunk,
+                        model=model,
+                        credentials=credentials,
+                        prompt_messages=prompt_messages,
+                        model_parameters=model_parameters,
+                        tools=tools,
+                        stop=stop,
+                        stream=stream,
+                        user=user,
+                    )
+                except Exception as e:
+                    if callback.raise_error:
+                        raise e
+                    else:
+                        logger.warning(f"Callback {callback.__class__.__name__} on_new_chunk failed with error {e}")
+    def _trigger_after_invoke_callbacks(
+        self,
+        model: str,
+        result: LLMResult,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> None:
+        """
+        Trigger after invoke callbacks
+        :param model: model name
+        :param result: result
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :param callbacks: callbacks
+        """
+        if callbacks:
+            for callback in callbacks:
+                try:
+                    callback.on_after_invoke(
+                        llm_instance=self,
+                        result=result,
+                        model=model,
+                        credentials=credentials,
+                        prompt_messages=prompt_messages,
+                        model_parameters=model_parameters,
+                        tools=tools,
+                        stop=stop,
+                        stream=stream,
+                        user=user,
+                    )
+                except Exception as e:
+                    if callback.raise_error:
+                        raise e
+                    else:
+                        logger.warning(f"Callback {callback.__class__.__name__} on_after_invoke failed with error {e}")
+    def _trigger_invoke_error_callbacks(
+        self,
+        model: str,
+        ex: Exception,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> None:
+        """
+        Trigger invoke error callbacks
+        :param model: model name
+        :param ex: exception
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :param callbacks: callbacks
+        """
+        if callbacks:
+            for callback in callbacks:
+                try:
+                    callback.on_invoke_error(
+                        llm_instance=self,
+                        ex=ex,
+                        model=model,
+                        credentials=credentials,
+                        prompt_messages=prompt_messages,
+                        model_parameters=model_parameters,
+                        tools=tools,
+                        stop=stop,
+                        stream=stream,
+                        user=user,
+                    )
+                except Exception as e:
+                    if callback.raise_error:
+                        raise e
+                    else:
+                        logger.warning(f"Callback {callback.__class__.__name__} on_invoke_error failed with error {e}")
+    def _validate_and_filter_model_parameters(self, model: str, model_parameters: dict, credentials: dict) -> dict:
+        """
+        Validate model parameters
+        :param model: model name
+        :param model_parameters: model parameters
+        :param credentials: model credentials
+        :return:
+        """
+        parameter_rules = self.get_parameter_rules(model, credentials)
+        # validate model parameters
+        filtered_model_parameters = {}
+        for parameter_rule in parameter_rules:
+            parameter_name = parameter_rule.name
+            parameter_value = model_parameters.get(parameter_name)
+            if parameter_value is None:
+                if parameter_rule.use_template and parameter_rule.use_template in model_parameters:
+                    # if parameter value is None, use template value variable name instead
+                    parameter_value = model_parameters[parameter_rule.use_template]
+                else:
+                    if parameter_rule.required:
+                        if parameter_rule.default is not None:
+                            filtered_model_parameters[parameter_name] = parameter_rule.default
+                            continue
+                        else:
+                            raise ValueError(f"Model Parameter {parameter_name} is required.")
+                    else:
+                        continue
+            # validate parameter value type
+            if parameter_rule.type == ParameterType.INT:
+                if not isinstance(parameter_value, int):
+                    raise ValueError(f"Model Parameter {parameter_name} should be int.")
+                # validate parameter value range
+                if parameter_rule.min is not None and parameter_value < parameter_rule.min:
+                    raise ValueError(
+                        f"Model Parameter {parameter_name} should be greater than or equal to {parameter_rule.min}."
+                    )
+                if parameter_rule.max is not None and parameter_value > parameter_rule.max:
+                    raise ValueError(
+                        f"Model Parameter {parameter_name} should be less than or equal to {parameter_rule.max}."
+                    )
+            elif parameter_rule.type == ParameterType.FLOAT:
+                if not isinstance(parameter_value, float | int):
+                    raise ValueError(f"Model Parameter {parameter_name} should be float.")
+                # validate parameter value precision
+                if parameter_rule.precision is not None:
+                    if parameter_rule.precision == 0:
+                        if parameter_value != int(parameter_value):
+                            raise ValueError(f"Model Parameter {parameter_name} should be int.")
+                    else:
+                        if parameter_value != round(parameter_value, parameter_rule.precision):
+                            raise ValueError(
+                                f"Model Parameter {parameter_name} should be round to {parameter_rule.precision}"
+                                f" decimal places."
+                            )
+                # validate parameter value range
+                if parameter_rule.min is not None and parameter_value < parameter_rule.min:
+                    raise ValueError(
+                        f"Model Parameter {parameter_name} should be greater than or equal to {parameter_rule.min}."
+                    )
+                if parameter_rule.max is not None and parameter_value > parameter_rule.max:
+                    raise ValueError(
+                        f"Model Parameter {parameter_name} should be less than or equal to {parameter_rule.max}."
+                    )
+            elif parameter_rule.type == ParameterType.BOOLEAN:
+                if not isinstance(parameter_value, bool):
+                    raise ValueError(f"Model Parameter {parameter_name} should be bool.")
+            elif parameter_rule.type == ParameterType.STRING:
+                if not isinstance(parameter_value, str):
+                    raise ValueError(f"Model Parameter {parameter_name} should be string.")
+                # validate options
+                if parameter_rule.options and parameter_value not in parameter_rule.options:
+                    raise ValueError(f"Model Parameter {parameter_name} should be one of {parameter_rule.options}.")
+            elif parameter_rule.type == ParameterType.TEXT:
+                if not isinstance(parameter_value, str):
+                    raise ValueError(f"Model Parameter {parameter_name} should be text.")
+                # validate options
+                if parameter_rule.options and parameter_value not in parameter_rule.options:
+                    raise ValueError(f"Model Parameter {parameter_name} should be one of {parameter_rule.options}.")
+            else:
+                raise ValueError(f"Model Parameter {parameter_name} type {parameter_rule.type} is not supported.")
+            filtered_model_parameters[parameter_name] = parameter_value
+        return filtered_model_parameters

api/core/model_runtime/model_providers/__base/model_provider.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+from abc import ABC, abstractmethod
+from typing import Optional
+from core.helper.module_import_helper import get_subclasses_from_module, import_module_from_source
+from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
+from core.model_runtime.entities.provider_entities import ProviderEntity
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+from core.tools.utils.yaml_utils import load_yaml_file
+class ModelProvider(ABC):
+    provider_schema: Optional[ProviderEntity] = None
+    model_instance_map: dict[str, AIModel] = {}
+    @abstractmethod
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        You can choose any validate_credentials method of model type or implement validate method by yourself,
+        such as: get model list api
+        if validate failed, raise exception
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        raise NotImplementedError
+    def get_provider_schema(self) -> ProviderEntity:
+        """
+        Get provider schema
+        :return: provider schema
+        """
+        if self.provider_schema:
+            return self.provider_schema
+        # get dirname of the current path
+        provider_name = self.__class__.__module__.split(".")[-1]
+        # get the path of the model_provider classes
+        base_path = os.path.abspath(__file__)
+        current_path = os.path.join(os.path.dirname(os.path.dirname(base_path)), provider_name)
+        # read provider schema from yaml file
+        yaml_path = os.path.join(current_path, f"{provider_name}.yaml")
+        yaml_data = load_yaml_file(yaml_path)
+        try:
+            # yaml_data to entity
+            provider_schema = ProviderEntity(**yaml_data)
+        except Exception as e:
+            raise Exception(f"Invalid provider schema for {provider_name}: {str(e)}")
+        # cache schema
+        self.provider_schema = provider_schema
+        return provider_schema
+    def models(self, model_type: ModelType) -> list[AIModelEntity]:
+        """
+        Get all models for given model type
+        :param model_type: model type defined in `ModelType`
+        :return: list of models
+        """
+        provider_schema = self.get_provider_schema()
+        if model_type not in provider_schema.supported_model_types:
+            return []
+        # get model instance of the model type
+        model_instance = self.get_model_instance(model_type)
+        # get predefined models (predefined_models)
+        models = model_instance.predefined_models()
+        # return models
+        return models
+    def get_model_instance(self, model_type: ModelType) -> AIModel:
+        """
+        Get model instance
+        :param model_type: model type defined in `ModelType`
+        :return:
+        """
+        # get dirname of the current path
+        provider_name = self.__class__.__module__.split(".")[-1]
+        if f"{provider_name}.{model_type.value}" in self.model_instance_map:
+            return self.model_instance_map[f"{provider_name}.{model_type.value}"]
+        # get the path of the model type classes
+        base_path = os.path.abspath(__file__)
+        model_type_name = model_type.value.replace("-", "_")
+        model_type_path = os.path.join(os.path.dirname(os.path.dirname(base_path)), provider_name, model_type_name)
+        model_type_py_path = os.path.join(model_type_path, f"{model_type_name}.py")
+        if not os.path.isdir(model_type_path) or not os.path.exists(model_type_py_path):
+            raise Exception(f"Invalid model type {model_type} for provider {provider_name}")
+        # Dynamic loading {model_type_name}.py file and find the subclass of AIModel
+        parent_module = ".".join(self.__class__.__module__.split(".")[:-1])
+        mod = import_module_from_source(
+            module_name=f"{parent_module}.{model_type_name}.{model_type_name}", py_file_path=model_type_py_path
+        )
+        # FIXME "type" has no attribute "__abstractmethods__" ignore it for now fix it later
+        model_class = next(
+            filter(
+                lambda x: x.__module__ == mod.__name__ and not x.__abstractmethods__,  # type: ignore
+                get_subclasses_from_module(mod, AIModel),
+            ),
+            None,
+        )
+        if not model_class:
+            raise Exception(f"Missing AIModel Class for model type {model_type} in {model_type_py_path}")
+        model_instance_map = model_class()
+        self.model_instance_map[f"{provider_name}.{model_type.value}"] = model_instance_map
+        return model_instance_map

api/core/model_runtime/model_providers/__base/moderation_model.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import time
+from abc import abstractmethod
+from typing import Optional
+from pydantic import ConfigDict
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+class ModerationModel(AIModel):
+    """
+    Model class for moderation model.
+    """
+    model_type: ModelType = ModelType.MODERATION
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    def invoke(self, model: str, credentials: dict, text: str, user: Optional[str] = None) -> bool:
+        """
+        Invoke moderation model
+        :param model: model name
+        :param credentials: model credentials
+        :param text: text to moderate
+        :param user: unique user id
+        :return: false if text is safe, true otherwise
+        """
+        self.started_at = time.perf_counter()
+        try:
+            return self._invoke(model, credentials, text, user)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    @abstractmethod
+    def _invoke(self, model: str, credentials: dict, text: str, user: Optional[str] = None) -> bool:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param text: text to moderate
+        :param user: unique user id
+        :return: false if text is safe, true otherwise
+        """
+        raise NotImplementedError

api/core/model_runtime/model_providers/__base/rerank_model.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import time
+from abc import abstractmethod
+from typing import Optional
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.entities.rerank_entities import RerankResult
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+class RerankModel(AIModel):
+    """
+    Base Model class for rerank model.
+    """
+    model_type: ModelType = ModelType.RERANK
+    def invoke(
+        self,
+        model: str,
+        credentials: dict,
+        query: str,
+        docs: list[str],
+        score_threshold: Optional[float] = None,
+        top_n: Optional[int] = None,
+        user: Optional[str] = None,
+    ) -> RerankResult:
+        """
+        Invoke rerank model
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n
+        :param user: unique user id
+        :return: rerank result
+        """
+        self.started_at = time.perf_counter()
+        try:
+            return self._invoke(model, credentials, query, docs, score_threshold, top_n, user)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    @abstractmethod
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        query: str,
+        docs: list[str],
+        score_threshold: Optional[float] = None,
+        top_n: Optional[int] = None,
+        user: Optional[str] = None,
+    ) -> RerankResult:
+        """
+        Invoke rerank model
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n
+        :param user: unique user id
+        :return: rerank result
+        """
+        raise NotImplementedError

api/core/model_runtime/model_providers/__base/speech2text_model.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+from abc import abstractmethod
+from typing import IO, Optional
+from pydantic import ConfigDict
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+class Speech2TextModel(AIModel):
+    """
+    Model class for speech2text model.
+    """
+    model_type: ModelType = ModelType.SPEECH2TEXT
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    def invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param file: audio file
+        :param user: unique user id
+        :return: text for given audio file
+        """
+        try:
+            return self._invoke(model, credentials, file, user)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    @abstractmethod
+    def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param file: audio file
+        :param user: unique user id
+        :return: text for given audio file
+        """
+        raise NotImplementedError
+    def _get_demo_file_path(self) -> str:
+        """
+        Get demo file for given model
+        :return: demo file
+        """
+        # Get the directory of the current file
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        # Construct the path to the audio file
+        return os.path.join(current_dir, "audio.mp3")

api/core/model_runtime/model_providers/__base/text2img_model.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from abc import abstractmethod
+from typing import IO, Optional
+from pydantic import ConfigDict
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+class Text2ImageModel(AIModel):
+    """
+    Model class for text2img model.
+    """
+    model_type: ModelType = ModelType.TEXT2IMG
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    def invoke(
+        self, model: str, credentials: dict, prompt: str, model_parameters: dict, user: Optional[str] = None
+    ) -> list[IO[bytes]]:
+        """
+        Invoke Text2Image model
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt: prompt for image generation
+        :param model_parameters: model parameters
+        :param user: unique user id
+        :return: image bytes
+        """
+        try:
+            return self._invoke(model, credentials, prompt, model_parameters, user)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    @abstractmethod
+    def _invoke(
+        self, model: str, credentials: dict, prompt: str, model_parameters: dict, user: Optional[str] = None
+    ) -> list[IO[bytes]]:
+        """
+        Invoke Text2Image model
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt: prompt for image generation
+        :param model_parameters: model parameters
+        :param user: unique user id
+        :return: image bytes
+        """
+        raise NotImplementedError

api/core/model_runtime/model_providers/__base/text_embedding_model.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import time
+from abc import abstractmethod
+from typing import Optional
+from pydantic import ConfigDict
+from core.entities.embedding_type import EmbeddingInputType
+from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+class TextEmbeddingModel(AIModel):
+    """
+    Model class for text embedding model.
+    """
+    model_type: ModelType = ModelType.TEXT_EMBEDDING
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    def invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+        self.started_at = time.perf_counter()
+        try:
+            return self._invoke(model, credentials, texts, user, input_type)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    @abstractmethod
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        texts: list[str],
+        user: Optional[str] = None,
+        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :param input_type: input type
+        :return: embeddings result
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        raise NotImplementedError
+    def _get_context_size(self, model: str, credentials: dict) -> int:
+        """
+        Get context size for given embedding model
+        :param model: model name
+        :param credentials: model credentials
+        :return: context size
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if model_schema and ModelPropertyKey.CONTEXT_SIZE in model_schema.model_properties:
+            content_size: int = model_schema.model_properties[ModelPropertyKey.CONTEXT_SIZE]
+            return content_size
+        return 1000
+    def _get_max_chunks(self, model: str, credentials: dict) -> int:
+        """
+        Get max chunks for given embedding model
+        :param model: model name
+        :param credentials: model credentials
+        :return: max chunks
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if model_schema and ModelPropertyKey.MAX_CHUNKS in model_schema.model_properties:
+            max_chunks: int = model_schema.model_properties[ModelPropertyKey.MAX_CHUNKS]
+            return max_chunks
+        return 1

api/core/model_runtime/model_providers/__base/tokenizers/gpt2/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

api/core/model_runtime/model_providers/__base/tokenizers/gpt2/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

api/core/model_runtime/model_providers/__base/tokenizers/gpt2/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 1024,
+  "pad_token": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

api/core/model_runtime/model_providers/__base/tokenizers/gpt2/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import logging
+from threading import Lock
+from typing import Any
+logger = logging.getLogger(__name__)
+_tokenizer: Any = None
+_lock = Lock()
+class GPT2Tokenizer:
+    @staticmethod
+    def _get_num_tokens_by_gpt2(text: str) -> int:
+        """
+        use gpt2 tokenizer to get num tokens
+        """
+        _tokenizer = GPT2Tokenizer.get_encoder()
+        tokens = _tokenizer.encode(text)
+        return len(tokens)
+    @staticmethod
+    def get_num_tokens(text: str) -> int:
+        # Because this process needs more cpu resource, we turn this back before we find a better way to handle it.
+        #
+        # future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
+        # result = future.result()
+        # return cast(int, result)
+        return GPT2Tokenizer._get_num_tokens_by_gpt2(text)
+    @staticmethod
+    def get_encoder() -> Any:
+        global _tokenizer, _lock
+        with _lock:
+            if _tokenizer is None:
+                # Try to use tiktoken to get the tokenizer because it is faster
+                #
+                try:
+                    import tiktoken
+                    _tokenizer = tiktoken.get_encoding("gpt2")
+                except Exception:
+                    from os.path import abspath, dirname, join
+                    from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer  # type: ignore
+                    base_path = abspath(__file__)
+                    gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
+                    _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
+                    logger.info("Fallback to Transformers' GPT-2 tokenizer from tiktoken")
+            return _tokenizer

api/core/model_runtime/model_providers/__base/tts_model.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import logging
+import re
+from abc import abstractmethod
+from collections.abc import Iterable
+from typing import Any, Optional
+from pydantic import ConfigDict
+from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
+from core.model_runtime.model_providers.__base.ai_model import AIModel
+logger = logging.getLogger(__name__)
+class TTSModel(AIModel):
+    """
+    Model class for TTS model.
+    """
+    model_type: ModelType = ModelType.TTS
+    # pydantic configs
+    model_config = ConfigDict(protected_namespaces=())
+    def invoke(
+        self,
+        model: str,
+        tenant_id: str,
+        credentials: dict,
+        content_text: str,
+        voice: str,
+        user: Optional[str] = None,
+    ) -> Iterable[bytes]:
+        """
+        Invoke large language model
+        :param model: model name
+        :param tenant_id: user tenant id
+        :param credentials: model credentials
+        :param voice: model timbre
+        :param content_text: text content to be translated
+        :param streaming: output is streaming
+        :param user: unique user id
+        :return: translated audio file
+        """
+        try:
+            return self._invoke(
+                model=model,
+                credentials=credentials,
+                user=user,
+                content_text=content_text,
+                voice=voice,
+                tenant_id=tenant_id,
+            )
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    @abstractmethod
+    def _invoke(
+        self,
+        model: str,
+        tenant_id: str,
+        credentials: dict,
+        content_text: str,
+        voice: str,
+        user: Optional[str] = None,
+    ) -> Iterable[bytes]:
+        """
+        Invoke large language model
+        :param model: model name
+        :param tenant_id: user tenant id
+        :param credentials: model credentials
+        :param voice: model timbre
+        :param content_text: text content to be translated
+        :param streaming: output is streaming
+        :param user: unique user id
+        :return: translated audio file
+        """
+        raise NotImplementedError
+    def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list:
+        """
+        Retrieves the list of voices supported by a given text-to-speech (TTS) model.
+        :param language: The language for which the voices are requested.
+        :param model: The name of the TTS model.
+        :param credentials: The credentials required to access the TTS model.
+        :return: A list of voices supported by the TTS model.
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if not model_schema or ModelPropertyKey.VOICES not in model_schema.model_properties:
+            raise ValueError("this model does not support voice")
+        voices = model_schema.model_properties[ModelPropertyKey.VOICES]
+        if language:
+            return [
+                {"name": d["name"], "value": d["mode"]} for d in voices if language and language in d.get("language")
+            ]
+        else:
+            return [{"name": d["name"], "value": d["mode"]} for d in voices]
+    def _get_model_default_voice(self, model: str, credentials: dict) -> Any:
+        """
+        Get voice for given tts model
+        :param model: model name
+        :param credentials: model credentials
+        :return: voice
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if model_schema and ModelPropertyKey.DEFAULT_VOICE in model_schema.model_properties:
+            return model_schema.model_properties[ModelPropertyKey.DEFAULT_VOICE]
+    def _get_model_audio_type(self, model: str, credentials: dict) -> str:
+        """
+        Get audio type for given tts model
+        :param model: model name
+        :param credentials: model credentials
+        :return: voice
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if not model_schema or ModelPropertyKey.AUDIO_TYPE not in model_schema.model_properties:
+            raise ValueError("this model does not support audio type")
+        audio_type: str = model_schema.model_properties[ModelPropertyKey.AUDIO_TYPE]
+        return audio_type
+    def _get_model_word_limit(self, model: str, credentials: dict) -> int:
+        """
+        Get audio type for given tts model
+        :return: audio type
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if not model_schema or ModelPropertyKey.WORD_LIMIT not in model_schema.model_properties:
+            raise ValueError("this model does not support word limit")
+        world_limit: int = model_schema.model_properties[ModelPropertyKey.WORD_LIMIT]
+        return world_limit
+    def _get_model_workers_limit(self, model: str, credentials: dict) -> int:
+        """
+        Get audio max workers for given tts model
+        :return: audio type
+        """
+        model_schema = self.get_model_schema(model, credentials)
+        if not model_schema or ModelPropertyKey.MAX_WORKERS not in model_schema.model_properties:
+            raise ValueError("this model does not support max workers")
+        workers_limit: int = model_schema.model_properties[ModelPropertyKey.MAX_WORKERS]
+        return workers_limit
+    @staticmethod
+    def _split_text_into_sentences(org_text, max_length=2000, pattern=r"[。.!?]"):
+        match = re.compile(pattern)
+        tx = match.finditer(org_text)
+        start = 0
+        result = []
+        one_sentence = ""
+        for i in tx:
+            end = i.regs[0][1]
+            tmp = org_text[start:end]
+            if len(one_sentence + tmp) > max_length:
+                result.append(one_sentence)
+                one_sentence = ""
+            one_sentence += tmp
+            start = end
+        last_sens = org_text[start:]
+        if last_sens:
+            one_sentence += last_sens
+        if one_sentence != "":
+            result.append(one_sentence)
+        return result

api/core/model_runtime/model_providers/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
2	+
3	+ model_provider_factory = ModelProviderFactory()

api/core/model_runtime/model_providers/_position.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+- openai
+- deepseek
+- anthropic
+- azure_openai
+- google
+- vertex_ai
+- nvidia
+- nvidia_nim
+- cohere
+- upstage
+- bedrock
+- togetherai
+- openrouter
+- ollama
+- mistralai
+- groq
+- replicate
+- huggingface_hub
+- xinference
+- triton_inference_server
+- zhipuai
+- baichuan
+- spark
+- minimax
+- tongyi
+- wenxin
+- moonshot
+- tencent
+- jina
+- chatglm
+- yi
+- openllm
+- localai
+- volcengine_maas
+- openai_api_compatible
+- hunyuan
+- siliconflow
+- perfxcloud
+- zhinao
+- fireworks
+- mixedbread
+- nomic
+- voyage

api/core/model_runtime/model_providers/anthropic/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/anthropic/_assets/icon_l_en.svg ADDED Viewed

api/core/model_runtime/model_providers/anthropic/_assets/icon_s_en.svg ADDED Viewed

api/core/model_runtime/model_providers/anthropic/anthropic.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import logging
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+logger = logging.getLogger(__name__)
+class AnthropicProvider(ModelProvider):
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+            # Use `claude-3-opus-20240229` model for validate,
+            model_instance.validate_credentials(model="claude-3-opus-20240229", credentials=credentials)
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+            raise ex

api/core/model_runtime/model_providers/anthropic/anthropic.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+provider: anthropic
+label:
+  en_US: Anthropic
+description:
+  en_US: Anthropic’s powerful models, such as Claude 3.
+  zh_Hans: Anthropic 的强大模型，例如 Claude 3。
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.svg
+background: "#F0F0EB"
+help:
+  title:
+    en_US: Get your API Key from Anthropic
+    zh_Hans: 从 Anthropic 获取 API Key
+  url:
+    en_US: https://console.anthropic.com/account/keys
+supported_model_types:
+  - llm
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: anthropic_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
+    - variable: anthropic_api_url
+      label:
+        en_US: API URL
+      type: text-input
+      required: false
+      placeholder:
+        zh_Hans: 在此输入您的 API URL
+        en_US: Enter your API URL

api/core/model_runtime/model_providers/anthropic/llm/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/anthropic/llm/_position.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+- claude-3-5-haiku-20241022
+- claude-3-5-sonnet-20241022
+- claude-3-5-sonnet-20240620
+- claude-3-haiku-20240307
+- claude-3-opus-20240229
+- claude-3-sonnet-20240229
+- claude-2.1
+- claude-instant-1.2
+- claude-2
+- claude-instant-1

api/core/model_runtime/model_providers/anthropic/llm/claude-2.1.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+model: claude-2.1
+label:
+  en_US: claude-2.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '8.00'
+  output: '24.00'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-2.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+model: claude-2
+label:
+  en_US: claude-2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 100000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '8.00'
+  output: '24.00'
+  unit: '0.000001'
+  currency: USD
+deprecated: true

api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-haiku-20241022.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+model: claude-3-5-haiku-20241022
+label:
+  en_US: claude-3-5-haiku-20241022
+model_type: llm
+features:
+  - agent-thought
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '1.00'
+  output: '5.00'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-sonnet-20240620.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+model: claude-3-5-sonnet-20240620
+label:
+  en_US: claude-3-5-sonnet-20240620
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '3.00'
+  output: '15.00'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-sonnet-20241022.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+model: claude-3-5-sonnet-20241022
+label:
+  en_US: claude-3-5-sonnet-20241022
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8192
+    min: 1
+    max: 8192
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '3.00'
+  output: '15.00'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-3-haiku-20240307.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+model: claude-3-haiku-20240307
+label:
+  en_US: claude-3-haiku-20240307
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.25'
+  output: '1.25'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-3-opus-20240229.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+model: claude-3-opus-20240229
+label:
+  en_US: claude-3-opus-20240229
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '15.00'
+  output: '75.00'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-3-sonnet-20240229.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+model: claude-3-sonnet-20240229
+label:
+  en_US: claude-3-sonnet-20240229
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '3.00'
+  output: '15.00'
+  unit: '0.000001'
+  currency: USD

api/core/model_runtime/model_providers/anthropic/llm/claude-instant-1.2.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+model: claude-instant-1.2
+label:
+  en_US: claude-instant-1.2
+model_type: llm
+features: [ ]
+model_properties:
+  mode: chat
+  context_size: 100000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '1.63'
+  output: '5.51'
+  unit: '0.000001'
+  currency: USD
+deprecated: true

api/core/model_runtime/model_providers/anthropic/llm/claude-instant-1.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+model: claude-instant-1
+label:
+  en_US: claude-instant-1
+model_type: llm
+features: [ ]
+model_properties:
+  mode: chat
+  context_size: 100000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens_to_sample
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '1.63'
+  output: '5.51'
+  unit: '0.000001'
+  currency: USD
+deprecated: true

api/core/model_runtime/model_providers/anthropic/llm/llm.py ADDED Viewed

	@@ -0,0 +1,654 @@

+import base64
+import json
+from collections.abc import Generator, Sequence
+from typing import Optional, Union, cast
+import anthropic
+import requests
+from anthropic import Anthropic, Stream
+from anthropic.types import (
+    ContentBlockDeltaEvent,
+    Message,
+    MessageDeltaEvent,
+    MessageStartEvent,
+    MessageStopEvent,
+    MessageStreamEvent,
+    completion_create_params,
+)
+from anthropic.types.beta.tools import ToolsBetaMessage
+from httpx import Timeout
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities import (
+    AssistantPromptMessage,
+    DocumentPromptMessageContent,
+    ImagePromptMessageContent,
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageTool,
+    SystemPromptMessage,
+    TextPromptMessageContent,
+    ToolPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+ANTHROPIC_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+<instructions>
+{{instructions}}
+</instructions>
+"""  # noqa: E501
+class AnthropicLargeLanguageModel(LargeLanguageModel):
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+        # invoke model
+        return self._chat_generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+    def _chat_generate(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: Sequence[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Invoke llm chat model
+        :param model: model name
+        :param credentials: credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+        # transform credentials to kwargs for model instance
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        # transform model parameters from completion api of anthropic to chat api
+        if "max_tokens_to_sample" in model_parameters:
+            model_parameters["max_tokens"] = model_parameters.pop("max_tokens_to_sample")
+        # init model client
+        client = Anthropic(**credentials_kwargs)
+        extra_model_kwargs = {}
+        if stop:
+            extra_model_kwargs["stop_sequences"] = stop
+        if user:
+            extra_model_kwargs["metadata"] = completion_create_params.Metadata(user_id=user)
+        system, prompt_message_dicts = self._convert_prompt_messages(prompt_messages)
+        if system:
+            extra_model_kwargs["system"] = system
+        # Add the new header for claude-3-5-sonnet-20240620 model
+        extra_headers = {}
+        if model == "claude-3-5-sonnet-20240620":
+            if model_parameters.get("max_tokens", 0) > 4096:
+                extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
+        if any(
+            isinstance(content, DocumentPromptMessageContent)
+            for prompt_message in prompt_messages
+            if isinstance(prompt_message.content, list)
+            for content in prompt_message.content
+        ):
+            extra_headers["anthropic-beta"] = "pdfs-2024-09-25"
+        if tools:
+            extra_model_kwargs["tools"] = [self._transform_tool_prompt(tool) for tool in tools]
+            response = client.beta.tools.messages.create(
+                model=model,
+                messages=prompt_message_dicts,
+                stream=stream,
+                extra_headers=extra_headers,
+                **model_parameters,
+                **extra_model_kwargs,
+            )
+        else:
+            # chat model
+            response = client.messages.create(
+                model=model,
+                messages=prompt_message_dicts,
+                stream=stream,
+                extra_headers=extra_headers,
+                **model_parameters,
+                **extra_model_kwargs,
+            )
+        if stream:
+            return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages)
+        return self._handle_chat_generate_response(model, credentials, response, prompt_messages)
+    def _code_block_mode_wrapper(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Code block mode wrapper for invoking large language model
+        """
+        if model_parameters.get("response_format"):
+            stop = stop or []
+            # chat model
+            self._transform_chat_json_prompts(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                response_format=model_parameters["response_format"],
+            )
+            model_parameters.pop("response_format")
+        return self._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+    def _transform_tool_prompt(self, tool: PromptMessageTool) -> dict:
+        return {"name": tool.name, "description": tool.description, "input_schema": tool.parameters}
+    def _transform_chat_json_prompts(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: list[PromptMessageTool] | None = None,
+        stop: list[str] | None = None,
+        stream: bool = True,
+        user: str | None = None,
+        response_format: str = "JSON",
+    ) -> None:
+        """
+        Transform json prompts
+        """
+        if "```\n" not in stop:
+            stop.append("```\n")
+        if "\n```" not in stop:
+            stop.append("\n```")
+        # check if there is a system message
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+            # override the system message
+            prompt_messages[0] = SystemPromptMessage(
+                content=ANTHROPIC_BLOCK_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content).replace(
+                    "{{block}}", response_format
+                )
+            )
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+        else:
+            # insert the system message
+            prompt_messages.insert(
+                0,
+                SystemPromptMessage(
+                    content=ANTHROPIC_BLOCK_MODE_PROMPT.replace(
+                        "{{instructions}}", f"Please output a valid {response_format} object."
+                    ).replace("{{block}}", response_format)
+                ),
+            )
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+    def get_num_tokens(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> int:
+        """
+        Get number of tokens for given prompt messages
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return:
+        """
+        prompt = self._convert_messages_to_prompt_anthropic(prompt_messages)
+        client = Anthropic(api_key="")
+        tokens = client.count_tokens(prompt)
+        tool_call_inner_prompts_tokens_map = {
+            "claude-3-opus-20240229": 395,
+            "claude-3-haiku-20240307": 264,
+            "claude-3-sonnet-20240229": 159,
+        }
+        if model in tool_call_inner_prompts_tokens_map and tools:
+            tokens += tool_call_inner_prompts_tokens_map[model]
+        return tokens
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._chat_generate(
+                model=model,
+                credentials=credentials,
+                prompt_messages=[
+                    UserPromptMessage(content="ping"),
+                ],
+                model_parameters={
+                    "temperature": 0,
+                    "max_tokens": 20,
+                },
+                stream=False,
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+    def _handle_chat_generate_response(
+        self,
+        model: str,
+        credentials: dict,
+        response: Union[Message, ToolsBetaMessage],
+        prompt_messages: list[PromptMessage],
+    ) -> LLMResult:
+        """
+        Handle llm chat response
+        :param model: model name
+        :param credentials: credentials
+        :param response: response
+        :param prompt_messages: prompt messages
+        :return: llm response
+        """
+        # transform assistant message to prompt message
+        assistant_prompt_message = AssistantPromptMessage(content="", tool_calls=[])
+        for content in response.content:
+            if content.type == "text":
+                assistant_prompt_message.content += content.text
+            elif content.type == "tool_use":
+                tool_call = AssistantPromptMessage.ToolCall(
+                    id=content.id,
+                    type="function",
+                    function=AssistantPromptMessage.ToolCall.ToolCallFunction(
+                        name=content.name, arguments=json.dumps(content.input)
+                    ),
+                )
+                assistant_prompt_message.tool_calls.append(tool_call)
+        # calculate num tokens
+        prompt_tokens = (response.usage and response.usage.input_tokens) or self.get_num_tokens(
+            model, credentials, prompt_messages
+        )
+        completion_tokens = (response.usage and response.usage.output_tokens) or self.get_num_tokens(
+            model, credentials, [assistant_prompt_message]
+        )
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+        # transform response
+        response = LLMResult(
+            model=response.model, prompt_messages=prompt_messages, message=assistant_prompt_message, usage=usage
+        )
+        return response
+    def _handle_chat_generate_stream_response(
+        self, model: str, credentials: dict, response: Stream[MessageStreamEvent], prompt_messages: list[PromptMessage]
+    ) -> Generator:
+        """
+        Handle llm chat stream response
+        :param model: model name
+        :param response: response
+        :param prompt_messages: prompt messages
+        :return: llm response chunk generator
+        """
+        full_assistant_content = ""
+        return_model = None
+        input_tokens = 0
+        output_tokens = 0
+        finish_reason = None
+        index = 0
+        tool_calls: list[AssistantPromptMessage.ToolCall] = []
+        for chunk in response:
+            if isinstance(chunk, MessageStartEvent):
+                if hasattr(chunk, "content_block"):
+                    content_block = chunk.content_block
+                    if isinstance(content_block, dict):
+                        if content_block.get("type") == "tool_use":
+                            tool_call = AssistantPromptMessage.ToolCall(
+                                id=content_block.get("id"),
+                                type="function",
+                                function=AssistantPromptMessage.ToolCall.ToolCallFunction(
+                                    name=content_block.get("name"), arguments=""
+                                ),
+                            )
+                            tool_calls.append(tool_call)
+                elif hasattr(chunk, "delta"):
+                    delta = chunk.delta
+                    if isinstance(delta, dict) and len(tool_calls) > 0:
+                        if delta.get("type") == "input_json_delta":
+                            tool_calls[-1].function.arguments += delta.get("partial_json", "")
+                elif chunk.message:
+                    return_model = chunk.message.model
+                    input_tokens = chunk.message.usage.input_tokens
+            elif isinstance(chunk, MessageDeltaEvent):
+                output_tokens = chunk.usage.output_tokens
+                finish_reason = chunk.delta.stop_reason
+            elif isinstance(chunk, MessageStopEvent):
+                # transform usage
+                usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens)
+                # transform empty tool call arguments to {}
+                for tool_call in tool_calls:
+                    if not tool_call.function.arguments:
+                        tool_call.function.arguments = "{}"
+                yield LLMResultChunk(
+                    model=return_model,
+                    prompt_messages=prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=index + 1,
+                        message=AssistantPromptMessage(content="", tool_calls=tool_calls),
+                        finish_reason=finish_reason,
+                        usage=usage,
+                    ),
+                )
+            elif isinstance(chunk, ContentBlockDeltaEvent):
+                chunk_text = chunk.delta.text or ""
+                full_assistant_content += chunk_text
+                # transform assistant message to prompt message
+                assistant_prompt_message = AssistantPromptMessage(content=chunk_text)
+                index = chunk.index
+                yield LLMResultChunk(
+                    model=return_model,
+                    prompt_messages=prompt_messages,
+                    delta=LLMResultChunkDelta(
+                        index=chunk.index,
+                        message=assistant_prompt_message,
+                    ),
+                )
+    def _to_credential_kwargs(self, credentials: dict) -> dict:
+        """
+        Transform credentials to kwargs for model instance
+        :param credentials:
+        :return:
+        """
+        credentials_kwargs = {
+            "api_key": credentials["anthropic_api_key"],
+            "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
+            "max_retries": 1,
+        }
+        if credentials.get("anthropic_api_url"):
+            credentials["anthropic_api_url"] = credentials["anthropic_api_url"].rstrip("/")
+            credentials_kwargs["base_url"] = credentials["anthropic_api_url"]
+        return credentials_kwargs
+    def _convert_prompt_messages(self, prompt_messages: Sequence[PromptMessage]) -> tuple[str, list[dict]]:
+        """
+        Convert prompt messages to dict list and system
+        """
+        system = ""
+        first_loop = True
+        for message in prompt_messages:
+            if isinstance(message, SystemPromptMessage):
+                if isinstance(message.content, str):
+                    message.content = message.content.strip()
+                elif isinstance(message.content, list):
+                    # System prompt only support text
+                    message.content = "".join(
+                        c.data.strip() for c in message.content if isinstance(c, TextPromptMessageContent)
+                    )
+                else:
+                    raise ValueError(f"Unknown system prompt message content type {type(message.content)}")
+                if first_loop:
+                    system = message.content
+                    first_loop = False
+                else:
+                    system += "\n"
+                    system += message.content
+        prompt_message_dicts = []
+        for message in prompt_messages:
+            if not isinstance(message, SystemPromptMessage):
+                if isinstance(message, UserPromptMessage):
+                    message = cast(UserPromptMessage, message)
+                    if isinstance(message.content, str):
+                        # handle empty user prompt see #10013 #10520
+                        # responses, ignore user prompts containing only whitespace, the Claude API can't handle it.
+                        if not message.content.strip():
+                            continue
+                        message_dict = {"role": "user", "content": message.content}
+                        prompt_message_dicts.append(message_dict)
+                    else:
+                        sub_messages = []
+                        for message_content in message.content:
+                            if message_content.type == PromptMessageContentType.TEXT:
+                                message_content = cast(TextPromptMessageContent, message_content)
+                                sub_message_dict = {"type": "text", "text": message_content.data}
+                                sub_messages.append(sub_message_dict)
+                            elif message_content.type == PromptMessageContentType.IMAGE:
+                                message_content = cast(ImagePromptMessageContent, message_content)
+                                if not message_content.base64_data:
+                                    # fetch image data from url
+                                    try:
+                                        image_content = requests.get(message_content.url).content
+                                        base64_data = base64.b64encode(image_content).decode("utf-8")
+                                    except Exception as ex:
+                                        raise ValueError(
+                                            f"Failed to fetch image data from url {message_content.data}, {ex}"
+                                        )
+                                else:
+                                    base64_data = message_content.base64_data
+                                mime_type = message_content.mime_type
+                                if mime_type not in {"image/jpeg", "image/png", "image/gif", "image/webp"}:
+                                    raise ValueError(
+                                        f"Unsupported image type {mime_type}, "
+                                        f"only support image/jpeg, image/png, image/gif, and image/webp"
+                                    )
+                                sub_message_dict = {
+                                    "type": "image",
+                                    "source": {"type": "base64", "media_type": mime_type, "data": base64_data},
+                                }
+                                sub_messages.append(sub_message_dict)
+                            elif isinstance(message_content, DocumentPromptMessageContent):
+                                if message_content.mime_type != "application/pdf":
+                                    raise ValueError(
+                                        f"Unsupported document type {message_content.mime_type}, "
+                                        "only support application/pdf"
+                                    )
+                                sub_message_dict = {
+                                    "type": "document",
+                                    "source": {
+                                        "type": "base64",
+                                        "media_type": message_content.mime_type,
+                                        "data": message_content.base64_data,
+                                    },
+                                }
+                                sub_messages.append(sub_message_dict)
+                        prompt_message_dicts.append({"role": "user", "content": sub_messages})
+                elif isinstance(message, AssistantPromptMessage):
+                    message = cast(AssistantPromptMessage, message)
+                    content = []
+                    if message.tool_calls:
+                        for tool_call in message.tool_calls:
+                            content.append(
+                                {
+                                    "type": "tool_use",
+                                    "id": tool_call.id,
+                                    "name": tool_call.function.name,
+                                    "input": json.loads(tool_call.function.arguments),
+                                }
+                            )
+                    if message.content:
+                        content.append({"type": "text", "text": message.content})
+                    if prompt_message_dicts[-1]["role"] == "assistant":
+                        prompt_message_dicts[-1]["content"].extend(content)
+                    else:
+                        prompt_message_dicts.append({"role": "assistant", "content": content})
+                elif isinstance(message, ToolPromptMessage):
+                    message = cast(ToolPromptMessage, message)
+                    message_dict = {
+                        "role": "user",
+                        "content": [
+                            {"type": "tool_result", "tool_use_id": message.tool_call_id, "content": message.content}
+                        ],
+                    }
+                    prompt_message_dicts.append(message_dict)
+                else:
+                    raise ValueError(f"Got unknown type {message}")
+        return system, prompt_message_dicts
+    def _convert_one_message_to_text(self, message: PromptMessage) -> str:
+        """
+        Convert a single message to a string.
+        :param message: PromptMessage to convert.
+        :return: String representation of the message.
+        """
+        human_prompt = "\n\nHuman:"
+        ai_prompt = "\n\nAssistant:"
+        content = message.content
+        if isinstance(message, UserPromptMessage):
+            message_text = f"{human_prompt} {content}"
+            if not isinstance(message.content, list):
+                message_text = f"{ai_prompt} {content}"
+            else:
+                message_text = ""
+                for sub_message in message.content:
+                    if sub_message.type == PromptMessageContentType.TEXT:
+                        message_text += f"{human_prompt} {sub_message.data}"
+                    elif sub_message.type == PromptMessageContentType.IMAGE:
+                        message_text += f"{human_prompt} [IMAGE]"
+        elif isinstance(message, AssistantPromptMessage):
+            if not isinstance(message.content, list):
+                message_text = f"{ai_prompt} {content}"
+            else:
+                message_text = ""
+                for sub_message in message.content:
+                    if sub_message.type == PromptMessageContentType.TEXT:
+                        message_text += f"{ai_prompt} {sub_message.data}"
+                    elif sub_message.type == PromptMessageContentType.IMAGE:
+                        message_text += f"{ai_prompt} [IMAGE]"
+        elif isinstance(message, SystemPromptMessage):
+            message_text = content
+        elif isinstance(message, ToolPromptMessage):
+            message_text = f"{human_prompt} {message.content}"
+        else:
+            raise ValueError(f"Got unknown type {message}")
+        return message_text
+    def _convert_messages_to_prompt_anthropic(self, messages: list[PromptMessage]) -> str:
+        """
+        Format a list of messages into a full prompt for the Anthropic model
+        :param messages: List of PromptMessage to combine.
+        :return: Combined string with necessary human_prompt and ai_prompt tags.
+        """
+        if not messages:
+            return ""
+        messages = messages.copy()  # don't mutate the original list
+        if not isinstance(messages[-1], AssistantPromptMessage):
+            messages.append(AssistantPromptMessage(content=""))
+        text = "".join(self._convert_one_message_to_text(message) for message in messages)
+        # trim off the trailing ' ' that might come from the "Assistant: "
+        return text.rstrip()
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [anthropic.APIConnectionError, anthropic.APITimeoutError],
+            InvokeServerUnavailableError: [anthropic.InternalServerError],
+            InvokeRateLimitError: [anthropic.RateLimitError],
+            InvokeAuthorizationError: [anthropic.AuthenticationError, anthropic.PermissionDeniedError],
+            InvokeBadRequestError: [
+                anthropic.BadRequestError,
+                anthropic.NotFoundError,
+                anthropic.UnprocessableEntityError,
+                anthropic.APIError,
+            ],
+        }

api/core/model_runtime/model_providers/azure_ai_studio/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/azure_ai_studio/_assets/icon_l_en.png ADDED Viewed

api/core/model_runtime/model_providers/azure_ai_studio/_assets/icon_s_en.png ADDED Viewed

api/core/model_runtime/model_providers/azure_ai_studio/azure_ai_studio.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import logging
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+logger = logging.getLogger(__name__)
+class AzureAIStudioProvider(ModelProvider):
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        pass

api/core/model_runtime/model_providers/azure_ai_studio/azure_ai_studio.yaml ADDED Viewed

	@@ -0,0 +1,99 @@

+provider: azure_ai_studio
+label:
+  zh_Hans: Azure AI Studio
+  en_US: Azure AI Studio
+icon_small:
+  en_US: icon_s_en.png
+icon_large:
+  en_US: icon_l_en.png
+description:
+  en_US: Azure AI Studio
+  zh_Hans: Azure AI Studio
+background: "#93c5fd"
+help:
+  title:
+    en_US: How to deploy customized model on Azure AI Studio
+    zh_Hans: 如何在Azure AI Studio上的私有化部署的模型
+  url:
+    en_US: https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models
+    zh_Hans: https://learn.microsoft.com/zh-cn/azure/ai-studio/how-to/deploy-models
+supported_model_types:
+  - llm
+  - rerank
+configurate_methods:
+  - customizable-model
+model_credential_schema:
+  model:
+    label:
+      en_US: Model Name
+      zh_Hans: 模型名称
+    placeholder:
+      en_US: Enter your model name
+      zh_Hans: 输入模型名称
+  credential_form_schemas:
+    - variable: endpoint
+      label:
+        en_US: Azure AI Studio Endpoint
+      type: text-input
+      required: true
+      placeholder:
+        zh_Hans: 请输入你的Azure AI Studio推理端点
+        en_US: 'Enter your API Endpoint, eg: https://example.com'
+    - variable: api_key
+      required: true
+      label:
+        en_US: API Key
+        zh_Hans: API Key
+      type: secret-input
+      placeholder:
+        en_US: Enter your Azure AI Studio API Key
+        zh_Hans: 在此输入您的 Azure AI Studio API Key
+      show_on:
+        - variable: __model_type
+          value: llm
+    - variable: mode
+      show_on:
+        - variable: __model_type
+          value: llm
+      label:
+        en_US: Completion mode
+      type: select
+      required: false
+      default: chat
+      placeholder:
+        zh_Hans: 选择对话类型
+        en_US: Select completion mode
+      options:
+        - value: completion
+          label:
+            en_US: Completion
+            zh_Hans: 补全
+        - value: chat
+          label:
+            en_US: Chat
+            zh_Hans: 对话
+    - variable: context_size
+      label:
+        zh_Hans: 模型上下文长度
+        en_US: Model context size
+      required: true
+      show_on:
+        - variable: __model_type
+          value: llm
+      type: text-input
+      default: "4096"
+      placeholder:
+        zh_Hans: 在此输入您的模型上下文长度
+        en_US: Enter your Model context size
+    - variable: jwt_token
+      required: true
+      label:
+        en_US: JWT Token
+        zh_Hans: JWT令牌
+      type: secret-input
+      placeholder:
+        en_US: Enter your Azure AI Studio JWT Token
+        zh_Hans: 在此输入您的 Azure AI Studio 推理 API Key
+      show_on:
+        - variable: __model_type
+          value: rerank

api/core/model_runtime/model_providers/azure_ai_studio/llm/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/azure_ai_studio/llm/llm.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import logging
+from collections.abc import Generator, Sequence
+from typing import Any, Optional, Union
+from azure.ai.inference import ChatCompletionsClient
+from azure.ai.inference.models import StreamingChatCompletionsUpdate, SystemMessage, UserMessage
+from azure.core.credentials import AzureKeyCredential
+from azure.core.exceptions import (
+    ClientAuthenticationError,
+    DecodeError,
+    DeserializationError,
+    HttpResponseError,
+    ResourceExistsError,
+    ResourceModifiedError,
+    ResourceNotFoundError,
+    ResourceNotModifiedError,
+    SerializationError,
+    ServiceRequestError,
+    ServiceResponseError,
+)
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessage,
+    PromptMessageTool,
+)
+from core.model_runtime.entities.model_entities import (
+    AIModelEntity,
+    FetchFrom,
+    I18nObject,
+    ModelPropertyKey,
+    ModelType,
+    ParameterRule,
+    ParameterType,
+)
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+logger = logging.getLogger(__name__)
+class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
+    """
+    Model class for Azure AI Studio large language model.
+    """
+    client: Any = None
+    from azure.ai.inference.models import StreamingChatCompletionsUpdate
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: Sequence[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[Sequence[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+    ) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+        if not self.client:
+            endpoint = str(credentials.get("endpoint"))
+            api_key = str(credentials.get("api_key"))
+            self.client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
+        messages = [{"role": msg.role.value, "content": msg.content} for msg in prompt_messages]
+        payload = {
+            "messages": messages,
+            "max_tokens": model_parameters.get("max_tokens", 4096),
+            "temperature": model_parameters.get("temperature", 0),
+            "top_p": model_parameters.get("top_p", 1),
+            "stream": stream,
+            "model": model,
+        }
+        if stop:
+            payload["stop"] = stop
+        if tools:
+            payload["tools"] = [tool.model_dump() for tool in tools]
+        try:
+            response = self.client.complete(**payload)
+            if stream:
+                return self._handle_stream_response(response, model, prompt_messages)
+            else:
+                return self._handle_non_stream_response(response, model, prompt_messages, credentials)
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+    def _handle_stream_response(self, response, model: str, prompt_messages: list[PromptMessage]) -> Generator:
+        for chunk in response:
+            if isinstance(chunk, StreamingChatCompletionsUpdate):
+                if chunk.choices:
+                    delta = chunk.choices[0].delta
+                    if delta.content:
+                        yield LLMResultChunk(
+                            model=model,
+                            prompt_messages=prompt_messages,
+                            delta=LLMResultChunkDelta(
+                                index=0,
+                                message=AssistantPromptMessage(content=delta.content, tool_calls=[]),
+                            ),
+                        )
+    def _handle_non_stream_response(
+        self, response, model: str, prompt_messages: list[PromptMessage], credentials: dict
+    ) -> LLMResult:
+        assistant_text = response.choices[0].message.content
+        assistant_prompt_message = AssistantPromptMessage(content=assistant_text)
+        usage = self._calc_response_usage(
+            model, credentials, response.usage.prompt_tokens, response.usage.completion_tokens
+        )
+        result = LLMResult(model=model, prompt_messages=prompt_messages, message=assistant_prompt_message, usage=usage)
+        if hasattr(response, "system_fingerprint"):
+            result.system_fingerprint = response.system_fingerprint
+        return result
+    def _invoke_result_generator(
+        self,
+        model: str,
+        result: Generator,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        model_parameters: dict,
+        tools: Optional[list[PromptMessageTool]] = None,
+        stop: Optional[list[str]] = None,
+        stream: bool = True,
+        user: Optional[str] = None,
+        callbacks: Optional[list[Callback]] = None,
+    ) -> Generator:
+        """
+        Invoke result generator
+        :param result: result generator
+        :return: result generator
+        """
+        callbacks = callbacks or []
+        prompt_message = AssistantPromptMessage(content="")
+        usage = None
+        system_fingerprint = None
+        real_model = model
+        try:
+            for chunk in result:
+                if isinstance(chunk, dict):
+                    content = chunk["choices"][0]["message"]["content"]
+                    usage = chunk["usage"]
+                    chunk = LLMResultChunk(
+                        model=model,
+                        prompt_messages=prompt_messages,
+                        delta=LLMResultChunkDelta(
+                            index=0,
+                            message=AssistantPromptMessage(content=content, tool_calls=[]),
+                        ),
+                        system_fingerprint=chunk.get("system_fingerprint"),
+                    )
+                yield chunk
+                self._trigger_new_chunk_callbacks(
+                    chunk=chunk,
+                    model=model,
+                    credentials=credentials,
+                    prompt_messages=prompt_messages,
+                    model_parameters=model_parameters,
+                    tools=tools,
+                    stop=stop,
+                    stream=stream,
+                    user=user,
+                    callbacks=callbacks,
+                )
+                prompt_message.content += chunk.delta.message.content
+                real_model = chunk.model
+                if hasattr(chunk.delta, "usage"):
+                    usage = chunk.delta.usage
+                if chunk.system_fingerprint:
+                    system_fingerprint = chunk.system_fingerprint
+        except Exception as e:
+            raise self._transform_invoke_error(e)
+        self._trigger_after_invoke_callbacks(
+            model=model,
+            result=LLMResult(
+                model=real_model,
+                prompt_messages=prompt_messages,
+                message=prompt_message,
+                usage=usage or LLMUsage.empty_usage(),
+                system_fingerprint=system_fingerprint,
+            ),
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user,
+            callbacks=callbacks,
+        )
+    def get_num_tokens(
+        self,
+        model: str,
+        credentials: dict,
+        prompt_messages: list[PromptMessage],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> int:
+        """
+        Get number of tokens for given prompt messages
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return:
+        """
+        # Implement token counting logic here
+        # Might need to use a tokenizer specific to the Azure AI Studio model
+        return 0
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            endpoint = str(credentials.get("endpoint"))
+            api_key = str(credentials.get("api_key"))
+            client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
+            client.complete(
+                messages=[
+                    SystemMessage(content="I say 'ping', you say 'pong'"),
+                    UserMessage(content="ping"),
+                ],
+                model=model,
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [
+                ServiceRequestError,
+            ],
+            InvokeServerUnavailableError: [
+                ServiceResponseError,
+            ],
+            InvokeAuthorizationError: [
+                ClientAuthenticationError,
+            ],
+            InvokeBadRequestError: [
+                HttpResponseError,
+                DecodeError,
+                ResourceExistsError,
+                ResourceNotFoundError,
+                ResourceModifiedError,
+                ResourceNotModifiedError,
+                SerializationError,
+                DeserializationError,
+            ],
+        }
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
+        """
+        Used to define customizable model schema
+        """
+        rules = [
+            ParameterRule(
+                name="temperature",
+                type=ParameterType.FLOAT,
+                use_template="temperature",
+                label=I18nObject(zh_Hans="温度", en_US="Temperature"),
+            ),
+            ParameterRule(
+                name="top_p",
+                type=ParameterType.FLOAT,
+                use_template="top_p",
+                label=I18nObject(zh_Hans="Top P", en_US="Top P"),
+            ),
+            ParameterRule(
+                name="max_tokens",
+                type=ParameterType.INT,
+                use_template="max_tokens",
+                min=1,
+                default=512,
+                label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
+            ),
+        ]
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(en_US=model),
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_type=ModelType.LLM,
+            features=[],
+            model_properties={
+                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "4096")),
+                ModelPropertyKey.MODE: credentials.get("mode", LLMMode.CHAT),
+            },
+            parameter_rules=rules,
+        )
+        return entity

api/core/model_runtime/model_providers/azure_ai_studio/rerank/__init__.py ADDED Viewed

File without changes

api/core/model_runtime/model_providers/azure_ai_studio/rerank/rerank.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import json
+import logging
+import os
+import ssl
+import urllib.request
+from typing import Optional
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+logger = logging.getLogger(__name__)
+class AzureRerankModel(RerankModel):
+    """
+    Model class for Azure AI Studio rerank model.
+    """
+    def _allow_self_signed_https(self, allowed):
+        # bypass the server certificate verification on client side
+        if allowed and not os.environ.get("PYTHONHTTPSVERIFY", "") and getattr(ssl, "_create_unverified_context", None):
+            ssl._create_default_https_context = ssl._create_unverified_context
+    def _azure_rerank(self, query_input: str, docs: list[str], endpoint: str, api_key: str):
+        #   self._allow_self_signed_https(True)  # Enable if using self-signed certificate
+        data = {"inputs": query_input, "docs": docs}
+        body = json.dumps(data).encode("utf-8")
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
+        req = urllib.request.Request(endpoint, body, headers)
+        try:
+            with urllib.request.urlopen(req) as response:
+                result = response.read()
+                return json.loads(result)
+        except urllib.error.HTTPError as error:
+            logger.exception(f"The request failed with status code: {error.code}")
+            logger.exception(error.info())
+            logger.exception(error.read().decode("utf8", "ignore"))
+            raise
+    def _invoke(
+        self,
+        model: str,
+        credentials: dict,
+        query: str,
+        docs: list[str],
+        score_threshold: Optional[float] = None,
+        top_n: Optional[int] = None,
+        user: Optional[str] = None,
+    ) -> RerankResult:
+        """
+        Invoke rerank model
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n
+        :param user: unique user id
+        :return: rerank result
+        """
+        try:
+            if len(docs) == 0:
+                return RerankResult(model=model, docs=[])
+            endpoint = credentials.get("endpoint")
+            api_key = credentials.get("jwt_token")
+            if not endpoint or not api_key:
+                raise ValueError("Azure endpoint and API key must be provided in credentials")
+            result = self._azure_rerank(query, docs, endpoint, api_key)
+            logger.info(f"Azure rerank result: {result}")
+            rerank_documents = []
+            for idx, (doc, score_dict) in enumerate(zip(docs, result)):
+                score = score_dict["score"]
+                rerank_document = RerankDocument(index=idx, text=doc, score=score)
+                if score_threshold is None or score >= score_threshold:
+                    rerank_documents.append(rerank_document)
+            rerank_documents.sort(key=lambda x: x.score, reverse=True)
+            if top_n:
+                rerank_documents = rerank_documents[:top_n]
+            return RerankResult(model=model, docs=rerank_documents)
+        except Exception as e:
+            logger.exception(f"Failed to invoke rerank model, model: {model}")
+            raise
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(
+                model=model,
+                credentials=credentials,
+                query="What is the capital of the United States?",
+                docs=[
+                    "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
+                    "Census, Carson City had a population of 55,274.",
+                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
+                    "are a political division controlled by the United States. Its capital is Saipan.",
+                ],
+                score_threshold=0.8,
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [urllib.error.URLError],
+            InvokeServerUnavailableError: [urllib.error.HTTPError],
+            InvokeRateLimitError: [InvokeRateLimitError],
+            InvokeAuthorizationError: [InvokeAuthorizationError],
+            InvokeBadRequestError: [InvokeBadRequestError, KeyError, ValueError, json.JSONDecodeError],
+        }
+    def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
+        """
+        used to define customizable model schema
+        """
+        entity = AIModelEntity(
+            model=model,
+            label=I18nObject(en_US=model),
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_type=ModelType.RERANK,
+            model_properties={},
+            parameter_rules=[],
+        )
+        return entity

api/core/model_runtime/model_providers/azure_openai/__init__.py ADDED Viewed

File without changes