CatPtain commited on
Commit
e94c687
·
verified ·
1 Parent(s): cda795a

Upload 45 files

Browse files
Files changed (46) hide show
  1. .gitattributes +14 -0
  2. api/core/model_runtime/callbacks/__init__.py +0 -0
  3. api/core/model_runtime/callbacks/base_callback.py +152 -0
  4. api/core/model_runtime/callbacks/logging_callback.py +170 -0
  5. api/core/model_runtime/docs/en_US/customizable_model_scale_out.md +310 -0
  6. api/core/model_runtime/docs/en_US/images/index/image-1.png +3 -0
  7. api/core/model_runtime/docs/en_US/images/index/image-2.png +3 -0
  8. api/core/model_runtime/docs/en_US/images/index/image-20231210143654461.png +3 -0
  9. api/core/model_runtime/docs/en_US/images/index/image-20231210144229650.png +3 -0
  10. api/core/model_runtime/docs/en_US/images/index/image-20231210144814617.png +3 -0
  11. api/core/model_runtime/docs/en_US/images/index/image-20231210151548521.png +0 -0
  12. api/core/model_runtime/docs/en_US/images/index/image-20231210151628992.png +0 -0
  13. api/core/model_runtime/docs/en_US/images/index/image-20231210165243632.png +3 -0
  14. api/core/model_runtime/docs/en_US/images/index/image-3.png +0 -0
  15. api/core/model_runtime/docs/en_US/images/index/image.png +3 -0
  16. api/core/model_runtime/docs/en_US/interfaces.md +706 -0
  17. api/core/model_runtime/docs/en_US/predefined_model_scale_out.md +173 -0
  18. api/core/model_runtime/docs/en_US/provider_scale_out.md +265 -0
  19. api/core/model_runtime/docs/en_US/schema.md +206 -0
  20. api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md +297 -0
  21. api/core/model_runtime/docs/zh_Hans/images/index/image-1.png +3 -0
  22. api/core/model_runtime/docs/zh_Hans/images/index/image-2.png +3 -0
  23. api/core/model_runtime/docs/zh_Hans/images/index/image-20231210143654461.png +3 -0
  24. api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144229650.png +3 -0
  25. api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144814617.png +3 -0
  26. api/core/model_runtime/docs/zh_Hans/images/index/image-20231210151548521.png +0 -0
  27. api/core/model_runtime/docs/zh_Hans/images/index/image-20231210151628992.png +0 -0
  28. api/core/model_runtime/docs/zh_Hans/images/index/image-20231210165243632.png +3 -0
  29. api/core/model_runtime/docs/zh_Hans/images/index/image-3.png +0 -0
  30. api/core/model_runtime/docs/zh_Hans/images/index/image.png +3 -0
  31. api/core/model_runtime/docs/zh_Hans/interfaces.md +746 -0
  32. api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md +172 -0
  33. api/core/model_runtime/docs/zh_Hans/provider_scale_out.md +188 -0
  34. api/core/model_runtime/docs/zh_Hans/schema.md +208 -0
  35. api/core/model_runtime/entities/__init__.py +45 -0
  36. api/core/model_runtime/entities/common_entities.py +17 -0
  37. api/core/model_runtime/entities/defaults.py +130 -0
  38. api/core/model_runtime/entities/llm_entities.py +143 -0
  39. api/core/model_runtime/entities/message_entities.py +218 -0
  40. api/core/model_runtime/entities/model_entities.py +227 -0
  41. api/core/model_runtime/entities/provider_entities.py +159 -0
  42. api/core/model_runtime/entities/rerank_entities.py +20 -0
  43. api/core/model_runtime/entities/text_embedding_entities.py +29 -0
  44. api/core/model_runtime/errors/__init__.py +0 -0
  45. api/core/model_runtime/errors/invoke.py +43 -0
  46. api/core/model_runtime/errors/validate.py +6 -0
.gitattributes CHANGED
@@ -12,3 +12,17 @@ api/core/tools/docs/images/index/image.png filter=lfs diff=lfs merge=lfs -text
12
  api/core/tools/provider/builtin/comfyui/_assets/icon.png filter=lfs diff=lfs merge=lfs -text
13
  api/core/tools/provider/builtin/dalle/_assets/icon.png filter=lfs diff=lfs merge=lfs -text
14
  api/core/tools/provider/builtin/wecom/_assets/icon.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  api/core/tools/provider/builtin/comfyui/_assets/icon.png filter=lfs diff=lfs merge=lfs -text
13
  api/core/tools/provider/builtin/dalle/_assets/icon.png filter=lfs diff=lfs merge=lfs -text
14
  api/core/tools/provider/builtin/wecom/_assets/icon.png filter=lfs diff=lfs merge=lfs -text
15
+ api/core/model_runtime/docs/en_US/images/index/image-1.png filter=lfs diff=lfs merge=lfs -text
16
+ api/core/model_runtime/docs/en_US/images/index/image-2.png filter=lfs diff=lfs merge=lfs -text
17
+ api/core/model_runtime/docs/en_US/images/index/image-20231210143654461.png filter=lfs diff=lfs merge=lfs -text
18
+ api/core/model_runtime/docs/en_US/images/index/image-20231210144229650.png filter=lfs diff=lfs merge=lfs -text
19
+ api/core/model_runtime/docs/en_US/images/index/image-20231210144814617.png filter=lfs diff=lfs merge=lfs -text
20
+ api/core/model_runtime/docs/en_US/images/index/image-20231210165243632.png filter=lfs diff=lfs merge=lfs -text
21
+ api/core/model_runtime/docs/en_US/images/index/image.png filter=lfs diff=lfs merge=lfs -text
22
+ api/core/model_runtime/docs/zh_Hans/images/index/image-1.png filter=lfs diff=lfs merge=lfs -text
23
+ api/core/model_runtime/docs/zh_Hans/images/index/image-2.png filter=lfs diff=lfs merge=lfs -text
24
+ api/core/model_runtime/docs/zh_Hans/images/index/image-20231210143654461.png filter=lfs diff=lfs merge=lfs -text
25
+ api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144229650.png filter=lfs diff=lfs merge=lfs -text
26
+ api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144814617.png filter=lfs diff=lfs merge=lfs -text
27
+ api/core/model_runtime/docs/zh_Hans/images/index/image-20231210165243632.png filter=lfs diff=lfs merge=lfs -text
28
+ api/core/model_runtime/docs/zh_Hans/images/index/image.png filter=lfs diff=lfs merge=lfs -text
api/core/model_runtime/callbacks/__init__.py ADDED
File without changes
api/core/model_runtime/callbacks/base_callback.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from collections.abc import Sequence
3
+ from typing import Optional
4
+
5
+ from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
6
+ from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
7
+ from core.model_runtime.model_providers.__base.ai_model import AIModel
8
+
9
+ _TEXT_COLOR_MAPPING = {
10
+ "blue": "36;1",
11
+ "yellow": "33;1",
12
+ "pink": "38;5;200",
13
+ "green": "32;1",
14
+ "red": "31;1",
15
+ }
16
+
17
+
18
+ class Callback(ABC):
19
+ """
20
+ Base class for callbacks.
21
+ Only for LLM.
22
+ """
23
+
24
+ raise_error: bool = False
25
+
26
+ @abstractmethod
27
+ def on_before_invoke(
28
+ self,
29
+ llm_instance: AIModel,
30
+ model: str,
31
+ credentials: dict,
32
+ prompt_messages: list[PromptMessage],
33
+ model_parameters: dict,
34
+ tools: Optional[list[PromptMessageTool]] = None,
35
+ stop: Optional[Sequence[str]] = None,
36
+ stream: bool = True,
37
+ user: Optional[str] = None,
38
+ ) -> None:
39
+ """
40
+ Before invoke callback
41
+
42
+ :param llm_instance: LLM instance
43
+ :param model: model name
44
+ :param credentials: model credentials
45
+ :param prompt_messages: prompt messages
46
+ :param model_parameters: model parameters
47
+ :param tools: tools for tool calling
48
+ :param stop: stop words
49
+ :param stream: is stream response
50
+ :param user: unique user id
51
+ """
52
+ raise NotImplementedError()
53
+
54
+ @abstractmethod
55
+ def on_new_chunk(
56
+ self,
57
+ llm_instance: AIModel,
58
+ chunk: LLMResultChunk,
59
+ model: str,
60
+ credentials: dict,
61
+ prompt_messages: list[PromptMessage],
62
+ model_parameters: dict,
63
+ tools: Optional[list[PromptMessageTool]] = None,
64
+ stop: Optional[Sequence[str]] = None,
65
+ stream: bool = True,
66
+ user: Optional[str] = None,
67
+ ):
68
+ """
69
+ On new chunk callback
70
+
71
+ :param llm_instance: LLM instance
72
+ :param chunk: chunk
73
+ :param model: model name
74
+ :param credentials: model credentials
75
+ :param prompt_messages: prompt messages
76
+ :param model_parameters: model parameters
77
+ :param tools: tools for tool calling
78
+ :param stop: stop words
79
+ :param stream: is stream response
80
+ :param user: unique user id
81
+ """
82
+ raise NotImplementedError()
83
+
84
+ @abstractmethod
85
+ def on_after_invoke(
86
+ self,
87
+ llm_instance: AIModel,
88
+ result: LLMResult,
89
+ model: str,
90
+ credentials: dict,
91
+ prompt_messages: list[PromptMessage],
92
+ model_parameters: dict,
93
+ tools: Optional[list[PromptMessageTool]] = None,
94
+ stop: Optional[Sequence[str]] = None,
95
+ stream: bool = True,
96
+ user: Optional[str] = None,
97
+ ) -> None:
98
+ """
99
+ After invoke callback
100
+
101
+ :param llm_instance: LLM instance
102
+ :param result: result
103
+ :param model: model name
104
+ :param credentials: model credentials
105
+ :param prompt_messages: prompt messages
106
+ :param model_parameters: model parameters
107
+ :param tools: tools for tool calling
108
+ :param stop: stop words
109
+ :param stream: is stream response
110
+ :param user: unique user id
111
+ """
112
+ raise NotImplementedError()
113
+
114
+ @abstractmethod
115
+ def on_invoke_error(
116
+ self,
117
+ llm_instance: AIModel,
118
+ ex: Exception,
119
+ model: str,
120
+ credentials: dict,
121
+ prompt_messages: list[PromptMessage],
122
+ model_parameters: dict,
123
+ tools: Optional[list[PromptMessageTool]] = None,
124
+ stop: Optional[Sequence[str]] = None,
125
+ stream: bool = True,
126
+ user: Optional[str] = None,
127
+ ) -> None:
128
+ """
129
+ Invoke error callback
130
+
131
+ :param llm_instance: LLM instance
132
+ :param ex: exception
133
+ :param model: model name
134
+ :param credentials: model credentials
135
+ :param prompt_messages: prompt messages
136
+ :param model_parameters: model parameters
137
+ :param tools: tools for tool calling
138
+ :param stop: stop words
139
+ :param stream: is stream response
140
+ :param user: unique user id
141
+ """
142
+ raise NotImplementedError()
143
+
144
+ def print_text(self, text: str, color: Optional[str] = None, end: str = "") -> None:
145
+ """Print text with highlighting and no end characters."""
146
+ text_to_print = self._get_colored_text(text, color) if color else text
147
+ print(text_to_print, end=end)
148
+
149
+ def _get_colored_text(self, text: str, color: str) -> str:
150
+ """Get colored text."""
151
+ color_str = _TEXT_COLOR_MAPPING[color]
152
+ return f"\u001b[{color_str}m\033[1;3m{text}\u001b[0m"
api/core/model_runtime/callbacks/logging_callback.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import sys
4
+ from collections.abc import Sequence
5
+ from typing import Optional, cast
6
+
7
+ from core.model_runtime.callbacks.base_callback import Callback
8
+ from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
9
+ from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool
10
+ from core.model_runtime.model_providers.__base.ai_model import AIModel
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class LoggingCallback(Callback):
16
+ def on_before_invoke(
17
+ self,
18
+ llm_instance: AIModel,
19
+ model: str,
20
+ credentials: dict,
21
+ prompt_messages: list[PromptMessage],
22
+ model_parameters: dict,
23
+ tools: Optional[list[PromptMessageTool]] = None,
24
+ stop: Optional[Sequence[str]] = None,
25
+ stream: bool = True,
26
+ user: Optional[str] = None,
27
+ ) -> None:
28
+ """
29
+ Before invoke callback
30
+
31
+ :param llm_instance: LLM instance
32
+ :param model: model name
33
+ :param credentials: model credentials
34
+ :param prompt_messages: prompt messages
35
+ :param model_parameters: model parameters
36
+ :param tools: tools for tool calling
37
+ :param stop: stop words
38
+ :param stream: is stream response
39
+ :param user: unique user id
40
+ """
41
+ self.print_text("\n[on_llm_before_invoke]\n", color="blue")
42
+ self.print_text(f"Model: {model}\n", color="blue")
43
+ self.print_text("Parameters:\n", color="blue")
44
+ for key, value in model_parameters.items():
45
+ self.print_text(f"\t{key}: {value}\n", color="blue")
46
+
47
+ if stop:
48
+ self.print_text(f"\tstop: {stop}\n", color="blue")
49
+
50
+ if tools:
51
+ self.print_text("\tTools:\n", color="blue")
52
+ for tool in tools:
53
+ self.print_text(f"\t\t{tool.name}\n", color="blue")
54
+
55
+ self.print_text(f"Stream: {stream}\n", color="blue")
56
+
57
+ if user:
58
+ self.print_text(f"User: {user}\n", color="blue")
59
+
60
+ self.print_text("Prompt messages:\n", color="blue")
61
+ for prompt_message in prompt_messages:
62
+ if prompt_message.name:
63
+ self.print_text(f"\tname: {prompt_message.name}\n", color="blue")
64
+
65
+ self.print_text(f"\trole: {prompt_message.role.value}\n", color="blue")
66
+ self.print_text(f"\tcontent: {prompt_message.content}\n", color="blue")
67
+
68
+ if stream:
69
+ self.print_text("\n[on_llm_new_chunk]")
70
+
71
+ def on_new_chunk(
72
+ self,
73
+ llm_instance: AIModel,
74
+ chunk: LLMResultChunk,
75
+ model: str,
76
+ credentials: dict,
77
+ prompt_messages: list[PromptMessage],
78
+ model_parameters: dict,
79
+ tools: Optional[list[PromptMessageTool]] = None,
80
+ stop: Optional[Sequence[str]] = None,
81
+ stream: bool = True,
82
+ user: Optional[str] = None,
83
+ ):
84
+ """
85
+ On new chunk callback
86
+
87
+ :param llm_instance: LLM instance
88
+ :param chunk: chunk
89
+ :param model: model name
90
+ :param credentials: model credentials
91
+ :param prompt_messages: prompt messages
92
+ :param model_parameters: model parameters
93
+ :param tools: tools for tool calling
94
+ :param stop: stop words
95
+ :param stream: is stream response
96
+ :param user: unique user id
97
+ """
98
+ sys.stdout.write(cast(str, chunk.delta.message.content))
99
+ sys.stdout.flush()
100
+
101
+ def on_after_invoke(
102
+ self,
103
+ llm_instance: AIModel,
104
+ result: LLMResult,
105
+ model: str,
106
+ credentials: dict,
107
+ prompt_messages: list[PromptMessage],
108
+ model_parameters: dict,
109
+ tools: Optional[list[PromptMessageTool]] = None,
110
+ stop: Optional[Sequence[str]] = None,
111
+ stream: bool = True,
112
+ user: Optional[str] = None,
113
+ ) -> None:
114
+ """
115
+ After invoke callback
116
+
117
+ :param llm_instance: LLM instance
118
+ :param result: result
119
+ :param model: model name
120
+ :param credentials: model credentials
121
+ :param prompt_messages: prompt messages
122
+ :param model_parameters: model parameters
123
+ :param tools: tools for tool calling
124
+ :param stop: stop words
125
+ :param stream: is stream response
126
+ :param user: unique user id
127
+ """
128
+ self.print_text("\n[on_llm_after_invoke]\n", color="yellow")
129
+ self.print_text(f"Content: {result.message.content}\n", color="yellow")
130
+
131
+ if result.message.tool_calls:
132
+ self.print_text("Tool calls:\n", color="yellow")
133
+ for tool_call in result.message.tool_calls:
134
+ self.print_text(f"\t{tool_call.id}\n", color="yellow")
135
+ self.print_text(f"\t{tool_call.function.name}\n", color="yellow")
136
+ self.print_text(f"\t{json.dumps(tool_call.function.arguments)}\n", color="yellow")
137
+
138
+ self.print_text(f"Model: {result.model}\n", color="yellow")
139
+ self.print_text(f"Usage: {result.usage}\n", color="yellow")
140
+ self.print_text(f"System Fingerprint: {result.system_fingerprint}\n", color="yellow")
141
+
142
+ def on_invoke_error(
143
+ self,
144
+ llm_instance: AIModel,
145
+ ex: Exception,
146
+ model: str,
147
+ credentials: dict,
148
+ prompt_messages: list[PromptMessage],
149
+ model_parameters: dict,
150
+ tools: Optional[list[PromptMessageTool]] = None,
151
+ stop: Optional[Sequence[str]] = None,
152
+ stream: bool = True,
153
+ user: Optional[str] = None,
154
+ ) -> None:
155
+ """
156
+ Invoke error callback
157
+
158
+ :param llm_instance: LLM instance
159
+ :param ex: exception
160
+ :param model: model name
161
+ :param credentials: model credentials
162
+ :param prompt_messages: prompt messages
163
+ :param model_parameters: model parameters
164
+ :param tools: tools for tool calling
165
+ :param stop: stop words
166
+ :param stream: is stream response
167
+ :param user: unique user id
168
+ """
169
+ self.print_text("\n[on_llm_invoke_error]\n", color="red")
170
+ logger.exception(ex)
api/core/model_runtime/docs/en_US/customizable_model_scale_out.md ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Custom Integration of Pre-defined Models
2
+
3
+ ### Introduction
4
+
5
+ After completing the vendors integration, the next step is to connect the vendor's models. To illustrate the entire connection process, we will use Xinference as an example to demonstrate a complete vendor integration.
6
+
7
+ It is important to note that for custom models, each model connection requires a complete vendor credential.
8
+
9
+ Unlike pre-defined models, a custom vendor integration always includes the following two parameters, which do not need to be defined in the vendor YAML file.
10
+
11
+ ![](images/index/image-3.png)
12
+
13
+ As mentioned earlier, vendors do not need to implement validate_provider_credential. The runtime will automatically call the corresponding model layer's validate_credentials to validate the credentials based on the model type and name selected by the user.
14
+
15
+ ### Writing the Vendor YAML
16
+
17
+ First, we need to identify the types of models supported by the vendor we are integrating.
18
+
19
+ Currently supported model types are as follows:
20
+
21
+ - `llm` Text Generation Models
22
+
23
+ - `text_embedding` Text Embedding Models
24
+
25
+ - `rerank` Rerank Models
26
+
27
+ - `speech2text` Speech-to-Text
28
+
29
+ - `tts` Text-to-Speech
30
+
31
+ - `moderation` Moderation
32
+
33
+ Xinference supports LLM, Text Embedding, and Rerank. So we will start by writing xinference.yaml.
34
+
35
+ ```yaml
36
+ provider: xinference #Define the vendor identifier
37
+ label: # Vendor display name, supports both en_US (English) and zh_Hans (Simplified Chinese). If zh_Hans is not set, it will use en_US by default.
38
+ en_US: Xorbits Inference
39
+ icon_small: # Small icon, refer to other vendors' icons stored in the _assets directory within the vendor implementation directory; follows the same language policy as the label
40
+ en_US: icon_s_en.svg
41
+ icon_large: # Large icon
42
+ en_US: icon_l_en.svg
43
+ help: # Help information
44
+ title:
45
+ en_US: How to deploy Xinference
46
+ zh_Hans: 如何部署 Xinference
47
+ url:
48
+ en_US: https://github.com/xorbitsai/inference
49
+ supported_model_types: # Supported model types. Xinference supports LLM, Text Embedding, and Rerank
50
+ - llm
51
+ - text-embedding
52
+ - rerank
53
+ configurate_methods: # Since Xinference is a locally deployed vendor with no predefined models, users need to deploy whatever models they need according to Xinference documentation. Thus, it only supports custom models.
54
+ - customizable-model
55
+ provider_credential_schema:
56
+ credential_form_schemas:
57
+ ```
58
+
59
+
60
+ Then, we need to determine what credentials are required to define a model in Xinference.
61
+
62
+ - Since it supports three different types of models, we need to specify the model_type to denote the model type. Here is how we can define it:
63
+
64
+ ```yaml
65
+ provider_credential_schema:
66
+ credential_form_schemas:
67
+ - variable: model_type
68
+ type: select
69
+ label:
70
+ en_US: Model type
71
+ zh_Hans: 模型类型
72
+ required: true
73
+ options:
74
+ - value: text-generation
75
+ label:
76
+ en_US: Language Model
77
+ zh_Hans: 语言模型
78
+ - value: embeddings
79
+ label:
80
+ en_US: Text Embedding
81
+ - value: reranking
82
+ label:
83
+ en_US: Rerank
84
+ ```
85
+
86
+ - Next, each model has its own model_name, so we need to define that here:
87
+
88
+ ```yaml
89
+ - variable: model_name
90
+ type: text-input
91
+ label:
92
+ en_US: Model name
93
+ zh_Hans: 模型名称
94
+ required: true
95
+ placeholder:
96
+ zh_Hans: 填写模型名称
97
+ en_US: Input model name
98
+ ```
99
+
100
+ - Specify the Xinference local deployment address:
101
+
102
+ ```yaml
103
+ - variable: server_url
104
+ label:
105
+ zh_Hans: 服务器URL
106
+ en_US: Server url
107
+ type: text-input
108
+ required: true
109
+ placeholder:
110
+ zh_Hans: 在此输入Xinference的服务器地址,如 https://example.com/xxx
111
+ en_US: Enter the url of your Xinference, for example https://example.com/xxx
112
+ ```
113
+
114
+ - Each model has a unique model_uid, so we also need to define that here:
115
+
116
+ ```yaml
117
+ - variable: model_uid
118
+ label:
119
+ zh_Hans: 模型UID
120
+ en_US: Model uid
121
+ type: text-input
122
+ required: true
123
+ placeholder:
124
+ zh_Hans: 在此输入您的Model UID
125
+ en_US: Enter the model uid
126
+ ```
127
+
128
+ Now, we have completed the basic definition of the vendor.
129
+
130
+ ### Writing the Model Code
131
+
132
+ Next, let's take the `llm` type as an example and write `xinference.llm.llm.py`.
133
+
134
+ In `llm.py`, create a Xinference LLM class, we name it `XinferenceAILargeLanguageModel` (this can be arbitrary), inheriting from the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
135
+
136
+ - LLM Invocation
137
+
138
+ Implement the core method for LLM invocation, supporting both stream and synchronous responses.
139
+
140
+ ```python
141
+ def _invoke(self, model: str, credentials: dict,
142
+ prompt_messages: list[PromptMessage], model_parameters: dict,
143
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
144
+ stream: bool = True, user: Optional[str] = None) \
145
+ -> Union[LLMResult, Generator]:
146
+ """
147
+ Invoke large language model
148
+
149
+ :param model: model name
150
+ :param credentials: model credentials
151
+ :param prompt_messages: prompt messages
152
+ :param model_parameters: model parameters
153
+ :param tools: tools for tool usage
154
+ :param stop: stop words
155
+ :param stream: is the response a stream
156
+ :param user: unique user id
157
+ :return: full response or stream response chunk generator result
158
+ """
159
+ ```
160
+
161
+ When implementing, ensure to use two functions to return data separately for synchronous and stream responses. This is important because Python treats functions containing the `yield` keyword as generator functions, mandating them to return `Generator` types. Here’s an example (note that the example uses simplified parameters; in real implementation, use the parameter list as defined above):
162
+
163
+ ```python
164
+ def _invoke(self, stream: bool, **kwargs) \
165
+ -> Union[LLMResult, Generator]:
166
+ if stream:
167
+ return self._handle_stream_response(**kwargs)
168
+ return self._handle_sync_response(**kwargs)
169
+
170
+ def _handle_stream_response(self, **kwargs) -> Generator:
171
+ for chunk in response:
172
+ yield chunk
173
+ def _handle_sync_response(self, **kwargs) -> LLMResult:
174
+ return LLMResult(**response)
175
+ ```
176
+
177
+ - Pre-compute Input Tokens
178
+
179
+ If the model does not provide an interface for pre-computing tokens, you can return 0 directly.
180
+
181
+ ```python
182
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],tools: Optional[list[PromptMessageTool]] = None) -> int:
183
+ """
184
+ Get number of tokens for given prompt messages
185
+
186
+ :param model: model name
187
+ :param credentials: model credentials
188
+ :param prompt_messages: prompt messages
189
+ :param tools: tools for tool usage
190
+ :return: token count
191
+ """
192
+ ```
193
+
194
+
195
+ Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
196
+
197
+ - Model Credentials Validation
198
+
199
+ Similar to vendor credentials validation, this method validates individual model credentials.
200
+
201
+ ```python
202
+ def validate_credentials(self, model: str, credentials: dict) -> None:
203
+ """
204
+ Validate model credentials
205
+
206
+ :param model: model name
207
+ :param credentials: model credentials
208
+ :return: None
209
+ """
210
+ ```
211
+
212
+ - Model Parameter Schema
213
+
214
+ Unlike custom types, since the YAML file does not define which parameters a model supports, we need to dynamically generate the model parameter schema.
215
+
216
+ For instance, Xinference supports `max_tokens`, `temperature`, and `top_p` parameters.
217
+
218
+ However, some vendors may support different parameters for different models. For example, the `OpenLLM` vendor supports `top_k`, but not all models provided by this vendor support `top_k`. Let's say model A supports `top_k` but model B does not. In such cases, we need to dynamically generate the model parameter schema, as illustrated below:
219
+
220
+ ```python
221
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
222
+ """
223
+ used to define customizable model schema
224
+ """
225
+ rules = [
226
+ ParameterRule(
227
+ name='temperature', type=ParameterType.FLOAT,
228
+ use_template='temperature',
229
+ label=I18nObject(
230
+ zh_Hans='温度', en_US='Temperature'
231
+ )
232
+ ),
233
+ ParameterRule(
234
+ name='top_p', type=ParameterType.FLOAT,
235
+ use_template='top_p',
236
+ label=I18nObject(
237
+ zh_Hans='Top P', en_US='Top P'
238
+ )
239
+ ),
240
+ ParameterRule(
241
+ name='max_tokens', type=ParameterType.INT,
242
+ use_template='max_tokens',
243
+ min=1,
244
+ default=512,
245
+ label=I18nObject(
246
+ zh_Hans='最大生成长度', en_US='Max Tokens'
247
+ )
248
+ )
249
+ ]
250
+
251
+ # if model is A, add top_k to rules
252
+ if model == 'A':
253
+ rules.append(
254
+ ParameterRule(
255
+ name='top_k', type=ParameterType.INT,
256
+ use_template='top_k',
257
+ min=1,
258
+ default=50,
259
+ label=I18nObject(
260
+ zh_Hans='Top K', en_US='Top K'
261
+ )
262
+ )
263
+ )
264
+
265
+ """
266
+ some NOT IMPORTANT code here
267
+ """
268
+
269
+ entity = AIModelEntity(
270
+ model=model,
271
+ label=I18nObject(
272
+ en_US=model
273
+ ),
274
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
275
+ model_type=model_type,
276
+ model_properties={
277
+ ModelPropertyKey.MODE: ModelType.LLM,
278
+ },
279
+ parameter_rules=rules
280
+ )
281
+
282
+ return entity
283
+ ```
284
+
285
+ - Exception Error Mapping
286
+
287
+ When a model invocation error occurs, it should be mapped to the runtime's specified `InvokeError` type, enabling Dify to handle different errors appropriately.
288
+
289
+ Runtime Errors:
290
+
291
+ - `InvokeConnectionError` Connection error during invocation
292
+ - `InvokeServerUnavailableError` Service provider unavailable
293
+ - `InvokeRateLimitError` Rate limit reached
294
+ - `InvokeAuthorizationError` Authorization failure
295
+ - `InvokeBadRequestError` Invalid request parameters
296
+
297
+ ```python
298
+ @property
299
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
300
+ """
301
+ Map model invoke error to unified error
302
+ The key is the error type thrown to the caller
303
+ The value is the error type thrown by the model,
304
+ which needs to be converted into a unified error type for the caller.
305
+
306
+ :return: Invoke error mapping
307
+ """
308
+ ```
309
+
310
+ For interface method details, see: [Interfaces](./interfaces.md). For specific implementations, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).
api/core/model_runtime/docs/en_US/images/index/image-1.png ADDED

Git LFS Details

  • SHA256: aa7b8b17348097b21e33016b526285c74549411ac1f06f3b679acb1a0a94ed9f
  • Pointer size: 131 Bytes
  • Size of remote file: 235 kB
api/core/model_runtime/docs/en_US/images/index/image-2.png ADDED

Git LFS Details

  • SHA256: f90954338d7931b5e3d72cab0b10750f25767a10c08defae3e521cb3c82decff
  • Pointer size: 131 Bytes
  • Size of remote file: 210 kB
api/core/model_runtime/docs/en_US/images/index/image-20231210143654461.png ADDED

Git LFS Details

  • SHA256: ae056bfb847b7d5b99bc004b9574de8967a2b3cad825764d9d9a24e4b85bbcb2
  • Pointer size: 131 Bytes
  • Size of remote file: 379 kB
api/core/model_runtime/docs/en_US/images/index/image-20231210144229650.png ADDED

Git LFS Details

  • SHA256: fe26d95f692868822fee5601a6f7163cbd5da01c7e6436c5e8dd55a8ea1e9f75
  • Pointer size: 131 Bytes
  • Size of remote file: 115 kB
api/core/model_runtime/docs/en_US/images/index/image-20231210144814617.png ADDED

Git LFS Details

  • SHA256: 1fd647063c5900fc541bfed8477a7122acbc46d6a296c0e84fb49dea984733b3
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
api/core/model_runtime/docs/en_US/images/index/image-20231210151548521.png ADDED
api/core/model_runtime/docs/en_US/images/index/image-20231210151628992.png ADDED
api/core/model_runtime/docs/en_US/images/index/image-20231210165243632.png ADDED

Git LFS Details

  • SHA256: dc4ae12b6d85610e2e70a07dfc20f9877d752490d706b0fa3682dfb211a7d844
  • Pointer size: 131 Bytes
  • Size of remote file: 554 kB
api/core/model_runtime/docs/en_US/images/index/image-3.png ADDED
api/core/model_runtime/docs/en_US/images/index/image.png ADDED

Git LFS Details

  • SHA256: 6008899b8b7b57af7c7e83509a6116dbc3f2c6cf040ae67eb1a1ae1fb78c3ede
  • Pointer size: 131 Bytes
  • Size of remote file: 268 kB
api/core/model_runtime/docs/en_US/interfaces.md ADDED
@@ -0,0 +1,706 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Interface Methods
2
+
3
+ This section describes the interface methods and parameter explanations that need to be implemented by providers and various model types.
4
+
5
+ ## Provider
6
+
7
+ Inherit the `__base.model_provider.ModelProvider` base class and implement the following interfaces:
8
+
9
+ ```python
10
+ def validate_provider_credentials(self, credentials: dict) -> None:
11
+ """
12
+ Validate provider credentials
13
+ You can choose any validate_credentials method of model type or implement validate method by yourself,
14
+ such as: get model list api
15
+
16
+ if validate failed, raise exception
17
+
18
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
19
+ """
20
+ ```
21
+
22
+ - `credentials` (object) Credential information
23
+
24
+ The parameters of credential information are defined by the `provider_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
25
+
26
+ If verification fails, throw the `errors.validate.CredentialsValidateFailedError` error.
27
+
28
+ ## Model
29
+
30
+ Models are divided into 5 different types, each inheriting from different base classes and requiring the implementation of different methods.
31
+
32
+ All models need to uniformly implement the following 2 methods:
33
+
34
+ - Model Credential Verification
35
+
36
+ Similar to provider credential verification, this step involves verification for an individual model.
37
+
38
+
39
+ ```python
40
+ def validate_credentials(self, model: str, credentials: dict) -> None:
41
+ """
42
+ Validate model credentials
43
+
44
+ :param model: model name
45
+ :param credentials: model credentials
46
+ :return:
47
+ """
48
+ ```
49
+
50
+ Parameters:
51
+
52
+ - `model` (string) Model name
53
+
54
+ - `credentials` (object) Credential information
55
+
56
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
57
+
58
+ If verification fails, throw the `errors.validate.CredentialsValidateFailedError` error.
59
+
60
+ - Invocation Error Mapping Table
61
+
62
+ When there is an exception in model invocation, it needs to be mapped to the `InvokeError` type specified by Runtime. This facilitates Dify's ability to handle different errors with appropriate follow-up actions.
63
+
64
+ Runtime Errors:
65
+
66
+ - `InvokeConnectionError` Invocation connection error
67
+ - `InvokeServerUnavailableError` Invocation service provider unavailable
68
+ - `InvokeRateLimitError` Invocation reached rate limit
69
+ - `InvokeAuthorizationError` Invocation authorization failure
70
+ - `InvokeBadRequestError` Invocation parameter error
71
+
72
+ ```python
73
+ @property
74
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
75
+ """
76
+ Map model invoke error to unified error
77
+ The key is the error type thrown to the caller
78
+ The value is the error type thrown by the model,
79
+ which needs to be converted into a unified error type for the caller.
80
+
81
+ :return: Invoke error mapping
82
+ """
83
+ ```
84
+
85
+ ​ You can refer to OpenAI's `_invoke_error_mapping` for an example.
86
+
87
+ ### LLM
88
+
89
+ Inherit the `__base.large_language_model.LargeLanguageModel` base class and implement the following interfaces:
90
+
91
+ - LLM Invocation
92
+
93
+ Implement the core method for LLM invocation, which can support both streaming and synchronous returns.
94
+
95
+
96
+ ```python
97
+ def _invoke(self, model: str, credentials: dict,
98
+ prompt_messages: list[PromptMessage], model_parameters: dict,
99
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[List[str]] = None,
100
+ stream: bool = True, user: Optional[str] = None) \
101
+ -> Union[LLMResult, Generator]:
102
+ """
103
+ Invoke large language model
104
+
105
+ :param model: model name
106
+ :param credentials: model credentials
107
+ :param prompt_messages: prompt messages
108
+ :param model_parameters: model parameters
109
+ :param tools: tools for tool calling
110
+ :param stop: stop words
111
+ :param stream: is stream response
112
+ :param user: unique user id
113
+ :return: full response or stream response chunk generator result
114
+ """
115
+ ```
116
+
117
+ - Parameters:
118
+
119
+ - `model` (string) Model name
120
+
121
+ - `credentials` (object) Credential information
122
+
123
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
124
+
125
+ - `prompt_messages` (array[[PromptMessage](#PromptMessage)]) List of prompts
126
+
127
+ If the model is of the `Completion` type, the list only needs to include one [UserPromptMessage](#UserPromptMessage) element;
128
+
129
+ If the model is of the `Chat` type, it requires a list of elements such as [SystemPromptMessage](#SystemPromptMessage), [UserPromptMessage](#UserPromptMessage), [AssistantPromptMessage](#AssistantPromptMessage), [ToolPromptMessage](#ToolPromptMessage) depending on the message.
130
+
131
+ - `model_parameters` (object) Model parameters
132
+
133
+ The model parameters are defined by the `parameter_rules` in the model's YAML configuration.
134
+
135
+ - `tools` (array[[PromptMessageTool](#PromptMessageTool)]) [optional] List of tools, equivalent to the `function` in `function calling`.
136
+
137
+ That is, the tool list for tool calling.
138
+
139
+ - `stop` (array[string]) [optional] Stop sequences
140
+
141
+ The model output will stop before the string defined by the stop sequence.
142
+
143
+ - `stream` (bool) Whether to output in a streaming manner, default is True
144
+
145
+ Streaming output returns Generator[[LLMResultChunk](#LLMResultChunk)], non-streaming output returns [LLMResult](#LLMResult).
146
+
147
+ - `user` (string) [optional] Unique identifier of the user
148
+
149
+ This can help the provider monitor and detect abusive behavior.
150
+
151
+ - Returns
152
+
153
+ Streaming output returns Generator[[LLMResultChunk](#LLMResultChunk)], non-streaming output returns [LLMResult](#LLMResult).
154
+
155
+ - Pre-calculating Input Tokens
156
+
157
+ If the model does not provide a pre-calculated tokens interface, you can directly return 0.
158
+
159
+ ```python
160
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
161
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
162
+ """
163
+ Get number of tokens for given prompt messages
164
+
165
+ :param model: model name
166
+ :param credentials: model credentials
167
+ :param prompt_messages: prompt messages
168
+ :param tools: tools for tool calling
169
+ :return:
170
+ """
171
+ ```
172
+
173
+ For parameter explanations, refer to the above section on `LLM Invocation`.
174
+
175
+ - Fetch Custom Model Schema [Optional]
176
+
177
+ ```python
178
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
179
+ """
180
+ Get customizable model schema
181
+
182
+ :param model: model name
183
+ :param credentials: model credentials
184
+ :return: model schema
185
+ """
186
+ ```
187
+
188
+ When the provider supports adding custom LLMs, this method can be implemented to allow custom models to fetch model schema. The default return null.
189
+
190
+
191
+ ### TextEmbedding
192
+
193
+ Inherit the `__base.text_embedding_model.TextEmbeddingModel` base class and implement the following interfaces:
194
+
195
+ - Embedding Invocation
196
+
197
+ ```python
198
+ def _invoke(self, model: str, credentials: dict,
199
+ texts: list[str], user: Optional[str] = None) \
200
+ -> TextEmbeddingResult:
201
+ """
202
+ Invoke large language model
203
+
204
+ :param model: model name
205
+ :param credentials: model credentials
206
+ :param texts: texts to embed
207
+ :param user: unique user id
208
+ :return: embeddings result
209
+ """
210
+ ```
211
+
212
+ - Parameters:
213
+
214
+ - `model` (string) Model name
215
+
216
+ - `credentials` (object) Credential information
217
+
218
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
219
+
220
+ - `texts` (array[string]) List of texts, capable of batch processing
221
+
222
+ - `user` (string) [optional] Unique identifier of the user
223
+
224
+ This can help the provider monitor and detect abusive behavior.
225
+
226
+ - Returns:
227
+
228
+ [TextEmbeddingResult](#TextEmbeddingResult) entity.
229
+
230
+ - Pre-calculating Tokens
231
+
232
+ ```python
233
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
234
+ """
235
+ Get number of tokens for given prompt messages
236
+
237
+ :param model: model name
238
+ :param credentials: model credentials
239
+ :param texts: texts to embed
240
+ :return:
241
+ """
242
+ ```
243
+
244
+ For parameter explanations, refer to the above section on `Embedding Invocation`.
245
+
246
+ ### Rerank
247
+
248
+ Inherit the `__base.rerank_model.RerankModel` base class and implement the following interfaces:
249
+
250
+ - Rerank Invocation
251
+
252
+ ```python
253
+ def _invoke(self, model: str, credentials: dict,
254
+ query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
255
+ user: Optional[str] = None) \
256
+ -> RerankResult:
257
+ """
258
+ Invoke rerank model
259
+
260
+ :param model: model name
261
+ :param credentials: model credentials
262
+ :param query: search query
263
+ :param docs: docs for reranking
264
+ :param score_threshold: score threshold
265
+ :param top_n: top n
266
+ :param user: unique user id
267
+ :return: rerank result
268
+ """
269
+ ```
270
+
271
+ - Parameters:
272
+
273
+ - `model` (string) Model name
274
+
275
+ - `credentials` (object) Credential information
276
+
277
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
278
+
279
+ - `query` (string) Query request content
280
+
281
+ - `docs` (array[string]) List of segments to be reranked
282
+
283
+ - `score_threshold` (float) [optional] Score threshold
284
+
285
+ - `top_n` (int) [optional] Select the top n segments
286
+
287
+ - `user` (string) [optional] Unique identifier of the user
288
+
289
+ This can help the provider monitor and detect abusive behavior.
290
+
291
+ - Returns:
292
+
293
+ [RerankResult](#RerankResult) entity.
294
+
295
+ ### Speech2text
296
+
297
+ Inherit the `__base.speech2text_model.Speech2TextModel` base class and implement the following interfaces:
298
+
299
+ - Invoke Invocation
300
+
301
+ ```python
302
+ def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
303
+ """
304
+ Invoke large language model
305
+
306
+ :param model: model name
307
+ :param credentials: model credentials
308
+ :param file: audio file
309
+ :param user: unique user id
310
+ :return: text for given audio file
311
+ """
312
+ ```
313
+
314
+ - Parameters:
315
+
316
+ - `model` (string) Model name
317
+
318
+ - `credentials` (object) Credential information
319
+
320
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
321
+
322
+ - `file` (File) File stream
323
+
324
+ - `user` (string) [optional] Unique identifier of the user
325
+
326
+ This can help the provider monitor and detect abusive behavior.
327
+
328
+ - Returns:
329
+
330
+ The string after speech-to-text conversion.
331
+
332
+ ### Text2speech
333
+
334
+ Inherit the `__base.text2speech_model.Text2SpeechModel` base class and implement the following interfaces:
335
+
336
+ - Invoke Invocation
337
+
338
+ ```python
339
+ def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None):
340
+ """
341
+ Invoke large language model
342
+
343
+ :param model: model name
344
+ :param credentials: model credentials
345
+ :param content_text: text content to be translated
346
+ :param streaming: output is streaming
347
+ :param user: unique user id
348
+ :return: translated audio file
349
+ """
350
+ ```
351
+
352
+ - Parameters:
353
+
354
+ - `model` (string) Model name
355
+
356
+ - `credentials` (object) Credential information
357
+
358
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
359
+
360
+ - `content_text` (string) The text content that needs to be converted
361
+
362
+ - `streaming` (bool) Whether to stream output
363
+
364
+ - `user` (string) [optional] Unique identifier of the user
365
+
366
+ This can help the provider monitor and detect abusive behavior.
367
+
368
+ - Returns:
369
+
370
+ Text converted speech stream。
371
+
372
+ ### Moderation
373
+
374
+ Inherit the `__base.moderation_model.ModerationModel` base class and implement the following interfaces:
375
+
376
+ - Invoke Invocation
377
+
378
+ ```python
379
+ def _invoke(self, model: str, credentials: dict,
380
+ text: str, user: Optional[str] = None) \
381
+ -> bool:
382
+ """
383
+ Invoke large language model
384
+
385
+ :param model: model name
386
+ :param credentials: model credentials
387
+ :param text: text to moderate
388
+ :param user: unique user id
389
+ :return: false if text is safe, true otherwise
390
+ """
391
+ ```
392
+
393
+ - Parameters:
394
+
395
+ - `model` (string) Model name
396
+
397
+ - `credentials` (object) Credential information
398
+
399
+ The parameters of credential information are defined by either the `provider_credential_schema` or `model_credential_schema` in the provider's YAML configuration file. Inputs such as `api_key` are included.
400
+
401
+ - `text` (string) Text content
402
+
403
+ - `user` (string) [optional] Unique identifier of the user
404
+
405
+ This can help the provider monitor and detect abusive behavior.
406
+
407
+ - Returns:
408
+
409
+ False indicates that the input text is safe, True indicates otherwise.
410
+
411
+
412
+
413
+ ## Entities
414
+
415
+ ### PromptMessageRole
416
+
417
+ Message role
418
+
419
+ ```python
420
+ class PromptMessageRole(Enum):
421
+ """
422
+ Enum class for prompt message.
423
+ """
424
+ SYSTEM = "system"
425
+ USER = "user"
426
+ ASSISTANT = "assistant"
427
+ TOOL = "tool"
428
+ ```
429
+
430
+ ### PromptMessageContentType
431
+
432
+ Message content types, divided into text and image.
433
+
434
+ ```python
435
+ class PromptMessageContentType(Enum):
436
+ """
437
+ Enum class for prompt message content type.
438
+ """
439
+ TEXT = 'text'
440
+ IMAGE = 'image'
441
+ ```
442
+
443
+ ### PromptMessageContent
444
+
445
+ Message content base class, used only for parameter declaration and cannot be initialized.
446
+
447
+ ```python
448
+ class PromptMessageContent(BaseModel):
449
+ """
450
+ Model class for prompt message content.
451
+ """
452
+ type: PromptMessageContentType
453
+ data: str
454
+ ```
455
+
456
+ Currently, two types are supported: text and image. It's possible to simultaneously input text and multiple images.
457
+
458
+ You need to initialize `TextPromptMessageContent` and `ImagePromptMessageContent` separately for input.
459
+
460
+ ### TextPromptMessageContent
461
+
462
+ ```python
463
+ class TextPromptMessageContent(PromptMessageContent):
464
+ """
465
+ Model class for text prompt message content.
466
+ """
467
+ type: PromptMessageContentType = PromptMessageContentType.TEXT
468
+ ```
469
+
470
+ If inputting a combination of text and images, the text needs to be constructed into this entity as part of the `content` list.
471
+
472
+ ### ImagePromptMessageContent
473
+
474
+ ```python
475
+ class ImagePromptMessageContent(PromptMessageContent):
476
+ """
477
+ Model class for image prompt message content.
478
+ """
479
+ class DETAIL(Enum):
480
+ LOW = 'low'
481
+ HIGH = 'high'
482
+
483
+ type: PromptMessageContentType = PromptMessageContentType.IMAGE
484
+ detail: DETAIL = DETAIL.LOW # Resolution
485
+ ```
486
+
487
+ If inputting a combination of text and images, the images need to be constructed into this entity as part of the `content` list.
488
+
489
+ `data` can be either a `url` or a `base64` encoded string of the image.
490
+
491
+ ### PromptMessage
492
+
493
+ The base class for all Role message bodies, used only for parameter declaration and cannot be initialized.
494
+
495
+ ```python
496
+ class PromptMessage(ABC, BaseModel):
497
+ """
498
+ Model class for prompt message.
499
+ """
500
+ role: PromptMessageRole
501
+ content: Optional[str | list[PromptMessageContent]] = None # Supports two types: string and content list. The content list is designed to meet the needs of multimodal inputs. For more details, see the PromptMessageContent explanation.
502
+ name: Optional[str] = None
503
+ ```
504
+
505
+ ### UserPromptMessage
506
+
507
+ UserMessage message body, representing a user's message.
508
+
509
+ ```python
510
+ class UserPromptMessage(PromptMessage):
511
+ """
512
+ Model class for user prompt message.
513
+ """
514
+ role: PromptMessageRole = PromptMessageRole.USER
515
+ ```
516
+
517
+ ### AssistantPromptMessage
518
+
519
+ Represents a message returned by the model, typically used for `few-shots` or inputting chat history.
520
+
521
+ ```python
522
+ class AssistantPromptMessage(PromptMessage):
523
+ """
524
+ Model class for assistant prompt message.
525
+ """
526
+ class ToolCall(BaseModel):
527
+ """
528
+ Model class for assistant prompt message tool call.
529
+ """
530
+ class ToolCallFunction(BaseModel):
531
+ """
532
+ Model class for assistant prompt message tool call function.
533
+ """
534
+ name: str # tool name
535
+ arguments: str # tool arguments
536
+
537
+ id: str # Tool ID, effective only in OpenAI tool calls. It's the unique ID for tool invocation and the same tool can be called multiple times.
538
+ type: str # default: function
539
+ function: ToolCallFunction # tool call information
540
+
541
+ role: PromptMessageRole = PromptMessageRole.ASSISTANT
542
+ tool_calls: list[ToolCall] = [] # The result of tool invocation in response from the model (returned only when tools are input and the model deems it necessary to invoke a tool).
543
+ ```
544
+
545
+ Where `tool_calls` are the list of `tool calls` returned by the model after invoking the model with the `tools` input.
546
+
547
+ ### SystemPromptMessage
548
+
549
+ Represents system messages, usually used for setting system commands given to the model.
550
+
551
+ ```python
552
+ class SystemPromptMessage(PromptMessage):
553
+ """
554
+ Model class for system prompt message.
555
+ """
556
+ role: PromptMessageRole = PromptMessageRole.SYSTEM
557
+ ```
558
+
559
+ ### ToolPromptMessage
560
+
561
+ Represents tool messages, used for conveying the results of a tool execution to the model for the next step of processing.
562
+
563
+ ```python
564
+ class ToolPromptMessage(PromptMessage):
565
+ """
566
+ Model class for tool prompt message.
567
+ """
568
+ role: PromptMessageRole = PromptMessageRole.TOOL
569
+ tool_call_id: str # Tool invocation ID. If OpenAI tool call is not supported, the name of the tool can also be inputted.
570
+ ```
571
+
572
+ The base class's `content` takes in the results of tool execution.
573
+
574
+ ### PromptMessageTool
575
+
576
+ ```python
577
+ class PromptMessageTool(BaseModel):
578
+ """
579
+ Model class for prompt message tool.
580
+ """
581
+ name: str
582
+ description: str
583
+ parameters: dict
584
+ ```
585
+
586
+ ---
587
+
588
+ ### LLMResult
589
+
590
+ ```python
591
+ class LLMResult(BaseModel):
592
+ """
593
+ Model class for llm result.
594
+ """
595
+ model: str # Actual used modele
596
+ prompt_messages: list[PromptMessage] # prompt messages
597
+ message: AssistantPromptMessage # response message
598
+ usage: LLMUsage # usage info
599
+ system_fingerprint: Optional[str] = None # request fingerprint, refer to OpenAI definition
600
+ ```
601
+
602
+ ### LLMResultChunkDelta
603
+
604
+ In streaming returns, each iteration contains the `delta` entity.
605
+
606
+ ```python
607
+ class LLMResultChunkDelta(BaseModel):
608
+ """
609
+ Model class for llm result chunk delta.
610
+ """
611
+ index: int
612
+ message: AssistantPromptMessage # response message
613
+ usage: Optional[LLMUsage] = None # usage info
614
+ finish_reason: Optional[str] = None # finish reason, only the last one returns
615
+ ```
616
+
617
+ ### LLMResultChunk
618
+
619
+ Each iteration entity in streaming returns.
620
+
621
+ ```python
622
+ class LLMResultChunk(BaseModel):
623
+ """
624
+ Model class for llm result chunk.
625
+ """
626
+ model: str # Actual used modele
627
+ prompt_messages: list[PromptMessage] # prompt messages
628
+ system_fingerprint: Optional[str] = None # request fingerprint, refer to OpenAI definition
629
+ delta: LLMResultChunkDelta
630
+ ```
631
+
632
+ ### LLMUsage
633
+
634
+ ```python
635
+ class LLMUsage(ModelUsage):
636
+ """
637
+ Model class for LLM usage.
638
+ """
639
+ prompt_tokens: int # Tokens used for prompt
640
+ prompt_unit_price: Decimal # Unit price for prompt
641
+ prompt_price_unit: Decimal # Price unit for prompt, i.e., the unit price based on how many tokens
642
+ prompt_price: Decimal # Cost for prompt
643
+ completion_tokens: int # Tokens used for response
644
+ completion_unit_price: Decimal # Unit price for response
645
+ completion_price_unit: Decimal # Price unit for response, i.e., the unit price based on how many tokens
646
+ completion_price: Decimal # Cost for response
647
+ total_tokens: int # Total number of tokens used
648
+ total_price: Decimal # Total cost
649
+ currency: str # Currency unit
650
+ latency: float # Request latency (s)
651
+ ```
652
+
653
+ ---
654
+
655
+ ### TextEmbeddingResult
656
+
657
+ ```python
658
+ class TextEmbeddingResult(BaseModel):
659
+ """
660
+ Model class for text embedding result.
661
+ """
662
+ model: str # Actual model used
663
+ embeddings: list[list[float]] # List of embedding vectors, corresponding to the input texts list
664
+ usage: EmbeddingUsage # Usage information
665
+ ```
666
+
667
+ ### EmbeddingUsage
668
+
669
+ ```python
670
+ class EmbeddingUsage(ModelUsage):
671
+ """
672
+ Model class for embedding usage.
673
+ """
674
+ tokens: int # Number of tokens used
675
+ total_tokens: int # Total number of tokens used
676
+ unit_price: Decimal # Unit price
677
+ price_unit: Decimal # Price unit, i.e., the unit price based on how many tokens
678
+ total_price: Decimal # Total cost
679
+ currency: str # Currency unit
680
+ latency: float # Request latency (s)
681
+ ```
682
+
683
+ ---
684
+
685
+ ### RerankResult
686
+
687
+ ```python
688
+ class RerankResult(BaseModel):
689
+ """
690
+ Model class for rerank result.
691
+ """
692
+ model: str # Actual model used
693
+ docs: list[RerankDocument] # Reranked document list
694
+ ```
695
+
696
+ ### RerankDocument
697
+
698
+ ```python
699
+ class RerankDocument(BaseModel):
700
+ """
701
+ Model class for rerank document.
702
+ """
703
+ index: int # original index
704
+ text: str
705
+ score: float
706
+ ```
api/core/model_runtime/docs/en_US/predefined_model_scale_out.md ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Predefined Model Integration
2
+
3
+ After completing the vendor integration, the next step is to integrate the models from the vendor.
4
+
5
+ First, we need to determine the type of model to be integrated and create the corresponding model type `module` under the respective vendor's directory.
6
+
7
+ Currently supported model types are:
8
+
9
+ - `llm` Text Generation Model
10
+ - `text_embedding` Text Embedding Model
11
+ - `rerank` Rerank Model
12
+ - `speech2text` Speech-to-Text
13
+ - `tts` Text-to-Speech
14
+ - `moderation` Moderation
15
+
16
+ Continuing with `Anthropic` as an example, `Anthropic` only supports LLM, so create a `module` named `llm` under `model_providers.anthropic`.
17
+
18
+ For predefined models, we first need to create a YAML file named after the model under the `llm` `module`, such as `claude-2.1.yaml`.
19
+
20
+ ### Prepare Model YAML
21
+
22
+ ```yaml
23
+ model: claude-2.1 # Model identifier
24
+ # Display name of the model, which can be set to en_US English or zh_Hans Chinese. If zh_Hans is not set, it will default to en_US.
25
+ # This can also be omitted, in which case the model identifier will be used as the label
26
+ label:
27
+ en_US: claude-2.1
28
+ model_type: llm # Model type, claude-2.1 is an LLM
29
+ features: # Supported features, agent-thought supports Agent reasoning, vision supports image understanding
30
+ - agent-thought
31
+ model_properties: # Model properties
32
+ mode: chat # LLM mode, complete for text completion models, chat for conversation models
33
+ context_size: 200000 # Maximum context size
34
+ parameter_rules: # Parameter rules for the model call; only LLM requires this
35
+ - name: temperature # Parameter variable name
36
+ # Five default configuration templates are provided: temperature/top_p/max_tokens/presence_penalty/frequency_penalty
37
+ # The template variable name can be set directly in use_template, which will use the default configuration in entities.defaults.PARAMETER_RULE_TEMPLATE
38
+ # Additional configuration parameters will override the default configuration if set
39
+ use_template: temperature
40
+ - name: top_p
41
+ use_template: top_p
42
+ - name: top_k
43
+ label: # Display name of the parameter
44
+ zh_Hans: 取样数量
45
+ en_US: Top k
46
+ type: int # Parameter type, supports float/int/string/boolean
47
+ help: # Help information, describing the parameter's function
48
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
49
+ en_US: Only sample from the top K options for each subsequent token.
50
+ required: false # Whether the parameter is mandatory; can be omitted
51
+ - name: max_tokens_to_sample
52
+ use_template: max_tokens
53
+ default: 4096 # Default value of the parameter
54
+ min: 1 # Minimum value of the parameter, applicable to float/int only
55
+ max: 4096 # Maximum value of the parameter, applicable to float/int only
56
+ pricing: # Pricing information
57
+ input: '8.00' # Input unit price, i.e., prompt price
58
+ output: '24.00' # Output unit price, i.e., response content price
59
+ unit: '0.000001' # Price unit, meaning the above prices are per 100K
60
+ currency: USD # Price currency
61
+ ```
62
+
63
+ It is recommended to prepare all model configurations before starting the implementation of the model code.
64
+
65
+ You can also refer to the YAML configuration information under the corresponding model type directories of other vendors in the `model_providers` directory. For the complete YAML rules, refer to: [Schema](schema.md#aimodelentity).
66
+
67
+ ### Implement the Model Call Code
68
+
69
+ Next, create a Python file named `llm.py` under the `llm` `module` to write the implementation code.
70
+
71
+ Create an Anthropic LLM class named `AnthropicLargeLanguageModel` (or any other name), inheriting from the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
72
+
73
+ - LLM Call
74
+
75
+ Implement the core method for calling the LLM, supporting both streaming and synchronous responses.
76
+
77
+ ```python
78
+ def _invoke(self, model: str, credentials: dict,
79
+ prompt_messages: list[PromptMessage], model_parameters: dict,
80
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
81
+ stream: bool = True, user: Optional[str] = None) \
82
+ -> Union[LLMResult, Generator]:
83
+ """
84
+ Invoke large language model
85
+
86
+ :param model: model name
87
+ :param credentials: model credentials
88
+ :param prompt_messages: prompt messages
89
+ :param model_parameters: model parameters
90
+ :param tools: tools for tool calling
91
+ :param stop: stop words
92
+ :param stream: is stream response
93
+ :param user: unique user id
94
+ :return: full response or stream response chunk generator result
95
+ """
96
+ ```
97
+
98
+ Ensure to use two functions for returning data, one for synchronous returns and the other for streaming returns, because Python identifies functions containing the `yield` keyword as generator functions, fixing the return type to `Generator`. Thus, synchronous and streaming returns need to be implemented separately, as shown below (note that the example uses simplified parameters, for actual implementation follow the above parameter list):
99
+
100
+ ```python
101
+ def _invoke(self, stream: bool, **kwargs) \
102
+ -> Union[LLMResult, Generator]:
103
+ if stream:
104
+ return self._handle_stream_response(**kwargs)
105
+ return self._handle_sync_response(**kwargs)
106
+
107
+ def _handle_stream_response(self, **kwargs) -> Generator:
108
+ for chunk in response:
109
+ yield chunk
110
+ def _handle_sync_response(self, **kwargs) -> LLMResult:
111
+ return LLMResult(**response)
112
+ ```
113
+
114
+ - Pre-compute Input Tokens
115
+
116
+ If the model does not provide an interface to precompute tokens, return 0 directly.
117
+
118
+ ```python
119
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
120
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
121
+ """
122
+ Get number of tokens for given prompt messages
123
+
124
+ :param model: model name
125
+ :param credentials: model credentials
126
+ :param prompt_messages: prompt messages
127
+ :param tools: tools for tool calling
128
+ :return:
129
+ """
130
+ ```
131
+
132
+ - Validate Model Credentials
133
+
134
+ Similar to vendor credential validation, but specific to a single model.
135
+
136
+ ```python
137
+ def validate_credentials(self, model: str, credentials: dict) -> None:
138
+ """
139
+ Validate model credentials
140
+
141
+ :param model: model name
142
+ :param credentials: model credentials
143
+ :return:
144
+ """
145
+ ```
146
+
147
+ - Map Invoke Errors
148
+
149
+ When a model call fails, map it to a specific `InvokeError` type as required by Runtime, allowing Dify to handle different errors accordingly.
150
+
151
+ Runtime Errors:
152
+
153
+ - `InvokeConnectionError` Connection error
154
+
155
+ - `InvokeServerUnavailableError` Service provider unavailable
156
+ - `InvokeRateLimitError` Rate limit reached
157
+ - `InvokeAuthorizationError` Authorization failed
158
+ - `InvokeBadRequestError` Parameter error
159
+
160
+ ```python
161
+ @property
162
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
163
+ """
164
+ Map model invoke error to unified error
165
+ The key is the error type thrown to the caller
166
+ The value is the error type thrown by the model,
167
+ which needs to be converted into a unified error type for the caller.
168
+
169
+ :return: Invoke error mapping
170
+ """
171
+ ```
172
+
173
+ For interface method explanations, see: [Interfaces](./interfaces.md). For detailed implementation, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).
api/core/model_runtime/docs/en_US/provider_scale_out.md ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Adding a New Provider
2
+
3
+ Providers support three types of model configuration methods:
4
+
5
+ - `predefined-model` Predefined model
6
+
7
+ This indicates that users only need to configure the unified provider credentials to use the predefined models under the provider.
8
+
9
+ - `customizable-model` Customizable model
10
+
11
+ Users need to add credential configurations for each model.
12
+
13
+ - `fetch-from-remote` Fetch from remote
14
+
15
+ This is consistent with the `predefined-model` configuration method. Only unified provider credentials need to be configured, and models are obtained from the provider through credential information.
16
+
17
+ These three configuration methods **can coexist**, meaning a provider can support `predefined-model` + `customizable-model` or `predefined-model` + `fetch-from-remote`, etc. In other words, configuring the unified provider credentials allows the use of predefined and remotely fetched models, and if new models are added, they can be used in addition to the custom models.
18
+
19
+ ## Getting Started
20
+
21
+ Adding a new provider starts with determining the English identifier of the provider, such as `anthropic`, and using this identifier to create a `module` in `model_providers`.
22
+
23
+ Under this `module`, we first need to prepare the provider's YAML configuration.
24
+
25
+ ### Preparing Provider YAML
26
+
27
+ Here, using `Anthropic` as an example, we preset the provider's basic information, supported model types, configuration methods, and credential rules.
28
+
29
+ ```YAML
30
+ provider: anthropic # Provider identifier
31
+ label: # Provider display name, can be set in en_US English and zh_Hans Chinese, zh_Hans will default to en_US if not set.
32
+ en_US: Anthropic
33
+ icon_small: # Small provider icon, stored in the _assets directory under the corresponding provider implementation directory, same language strategy as label
34
+ en_US: icon_s_en.png
35
+ icon_large: # Large provider icon, stored in the _assets directory under the corresponding provider implementation directory, same language strategy as label
36
+ en_US: icon_l_en.png
37
+ supported_model_types: # Supported model types, Anthropic only supports LLM
38
+ - llm
39
+ configurate_methods: # Supported configuration methods, Anthropic only supports predefined models
40
+ - predefined-model
41
+ provider_credential_schema: # Provider credential rules, as Anthropic only supports predefined models, unified provider credential rules need to be defined
42
+ credential_form_schemas: # List of credential form items
43
+ - variable: anthropic_api_key # Credential parameter variable name
44
+ label: # Display name
45
+ en_US: API Key
46
+ type: secret-input # Form type, here secret-input represents an encrypted information input box, showing masked information when editing.
47
+ required: true # Whether required
48
+ placeholder: # Placeholder information
49
+ zh_Hans: Enter your API Key here
50
+ en_US: Enter your API Key
51
+ - variable: anthropic_api_url
52
+ label:
53
+ en_US: API URL
54
+ type: text-input # Form type, here text-input represents a text input box
55
+ required: false
56
+ placeholder:
57
+ zh_Hans: Enter your API URL here
58
+ en_US: Enter your API URL
59
+ ```
60
+
61
+ You can also refer to the YAML configuration information under other provider directories in `model_providers`. The complete YAML rules are available at: [Schema](schema.md#provider).
62
+
63
+ ### Implementing Provider Code
64
+
65
+ Providers need to inherit the `__base.model_provider.ModelProvider` base class and implement the `validate_provider_credentials` method for unified provider credential verification. For reference, see [AnthropicProvider](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/anthropic.py).
66
+ > If the provider is the type of `customizable-model`, there is no need to implement the `validate_provider_credentials` method.
67
+
68
+ ```python
69
+ def validate_provider_credentials(self, credentials: dict) -> None:
70
+ """
71
+ Validate provider credentials
72
+ You can choose any validate_credentials method of model type or implement validate method by yourself,
73
+ such as: get model list api
74
+
75
+ if validate failed, raise exception
76
+
77
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
78
+ """
79
+ ```
80
+
81
+ Of course, you can also preliminarily reserve the implementation of `validate_provider_credentials` and directly reuse it after the model credential verification method is implemented.
82
+
83
+ ---
84
+
85
+ ### Adding Models
86
+
87
+ After the provider integration is complete, the next step is to integrate models under the provider.
88
+
89
+ First, we need to determine the type of the model to be integrated and create a `module` for the corresponding model type in the provider's directory.
90
+
91
+ The currently supported model types are as follows:
92
+
93
+ - `llm` Text generation model
94
+ - `text_embedding` Text Embedding model
95
+ - `rerank` Rerank model
96
+ - `speech2text` Speech to text
97
+ - `tts` Text to speech
98
+ - `moderation` Moderation
99
+
100
+ Continuing with `Anthropic` as an example, since `Anthropic` only supports LLM, we create a `module` named `llm` in `model_providers.anthropic`.
101
+
102
+ For predefined models, we first need to create a YAML file named after the model, such as `claude-2.1.yaml`, under the `llm` `module`.
103
+
104
+ #### Preparing Model YAML
105
+
106
+ ```yaml
107
+ model: claude-2.1 # Model identifier
108
+ # Model display name, can be set in en_US English and zh_Hans Chinese, zh_Hans will default to en_US if not set.
109
+ # Alternatively, if the label is not set, use the model identifier content.
110
+ label:
111
+ en_US: claude-2.1
112
+ model_type: llm # Model type, claude-2.1 is an LLM
113
+ features: # Supported features, agent-thought for Agent reasoning, vision for image understanding
114
+ - agent-thought
115
+ model_properties: # Model properties
116
+ mode: chat # LLM mode, complete for text completion model, chat for dialogue model
117
+ context_size: 200000 # Maximum supported context size
118
+ parameter_rules: # Model invocation parameter rules, only required for LLM
119
+ - name: temperature # Invocation parameter variable name
120
+ # Default preset with 5 variable content configuration templates: temperature/top_p/max_tokens/presence_penalty/frequency_penalty
121
+ # Directly set the template variable name in use_template, which will use the default configuration in entities.defaults.PARAMETER_RULE_TEMPLATE
122
+ # If additional configuration parameters are set, they will override the default configuration
123
+ use_template: temperature
124
+ - name: top_p
125
+ use_template: top_p
126
+ - name: top_k
127
+ label: # Invocation parameter display name
128
+ zh_Hans: Sampling quantity
129
+ en_US: Top k
130
+ type: int # Parameter type, supports float/int/string/boolean
131
+ help: # Help information, describing the role of the parameter
132
+ zh_Hans: Only sample from the top K options for each subsequent token.
133
+ en_US: Only sample from the top K options for each subsequent token.
134
+ required: false # Whether required, can be left unset
135
+ - name: max_tokens_to_sample
136
+ use_template: max_tokens
137
+ default: 4096 # Default parameter value
138
+ min: 1 # Minimum parameter value, only applicable for float/int
139
+ max: 4096 # Maximum parameter value, only applicable for float/int
140
+ pricing: # Pricing information
141
+ input: '8.00' # Input price, i.e., Prompt price
142
+ output: '24.00' # Output price, i.e., returned content price
143
+ unit: '0.000001' # Pricing unit, i.e., the above prices are per 100K
144
+ currency: USD # Currency
145
+ ```
146
+
147
+ It is recommended to prepare all model configurations before starting the implementation of the model code.
148
+
149
+ Similarly, you can also refer to the YAML configuration information for corresponding model types of other providers in the `model_providers` directory. The complete YAML rules can be found at: [Schema](schema.md#AIModel).
150
+
151
+ #### Implementing Model Invocation Code
152
+
153
+ Next, you need to create a python file named `llm.py` under the `llm` `module` to write the implementation code.
154
+
155
+ In `llm.py`, create an Anthropic LLM class, which we name `AnthropicLargeLanguageModel` (arbitrarily), inheriting the `__base.large_language_model.LargeLanguageModel` base class, and implement the following methods:
156
+
157
+ - LLM Invocation
158
+
159
+ Implement the core method for LLM invocation, which can support both streaming and synchronous returns.
160
+
161
+ ```python
162
+ def _invoke(self, model: str, credentials: dict,
163
+ prompt_messages: list[PromptMessage], model_parameters: dict,
164
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
165
+ stream: bool = True, user: Optional[str] = None) \
166
+ -> Union[LLMResult, Generator]:
167
+ """
168
+ Invoke large language model
169
+
170
+ :param model: model name
171
+ :param credentials: model credentials
172
+ :param prompt_messages: prompt messages
173
+ :param model_parameters: model parameters
174
+ :param tools: tools for tool calling
175
+ :param stop: stop words
176
+ :param stream: is stream response
177
+ :param user: unique user id
178
+ :return: full response or stream response chunk generator result
179
+ """
180
+ ```
181
+
182
+ - Pre-calculating Input Tokens
183
+
184
+ If the model does not provide a pre-calculated tokens interface, you can directly return 0.
185
+
186
+ ```python
187
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
188
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
189
+ """
190
+ Get number of tokens for given prompt messages
191
+
192
+ :param model: model name
193
+ :param credentials: model credentials
194
+ :param prompt_messages: prompt messages
195
+ :param tools: tools for tool calling
196
+ :return:
197
+ """
198
+ ```
199
+
200
+ - Model Credential Verification
201
+
202
+ Similar to provider credential verification, this step involves verification for an individual model.
203
+
204
+ ```python
205
+ def validate_credentials(self, model: str, credentials: dict) -> None:
206
+ """
207
+ Validate model credentials
208
+
209
+ :param model: model name
210
+ :param credentials: model credentials
211
+ :return:
212
+ """
213
+ ```
214
+
215
+ - Invocation Error Mapping Table
216
+
217
+ When there is an exception in model invocation, it needs to be mapped to the `InvokeError` type specified by Runtime. This facilitates Dify's ability to handle different errors with appropriate follow-up actions.
218
+
219
+ Runtime Errors:
220
+
221
+ - `InvokeConnectionError` Invocation connection error
222
+ - `InvokeServerUnavailableError` Invocation service provider unavailable
223
+ - `InvokeRateLimitError` Invocation reached rate limit
224
+ - `InvokeAuthorizationError` Invocation authorization failure
225
+ - `InvokeBadRequestError` Invocation parameter error
226
+
227
+ ```python
228
+ @property
229
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
230
+ """
231
+ Map model invoke error to unified error
232
+ The key is the error type thrown to the caller
233
+ The value is the error type thrown by the model,
234
+ which needs to be converted into a unified error type for the caller.
235
+
236
+ :return: Invoke error mapping
237
+ """
238
+ ```
239
+
240
+ For details on the interface methods, see: [Interfaces](interfaces.md). For specific implementations, refer to: [llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py).
241
+
242
+ ### Testing
243
+
244
+ To ensure the availability of integrated providers/models, each method written needs corresponding integration test code in the `tests` directory.
245
+
246
+ Continuing with `Anthropic` as an example:
247
+
248
+ Before writing test code, you need to first add the necessary credential environment variables for the test provider in `.env.example`, such as: `ANTHROPIC_API_KEY`.
249
+
250
+ Before execution, copy `.env.example` to `.env` and then execute.
251
+
252
+ #### Writing Test Code
253
+
254
+ Create a `module` with the same name as the provider in the `tests` directory: `anthropic`, and continue to create `test_provider.py` and test py files for the corresponding model types within this module, as shown below:
255
+
256
+ ```shell
257
+ .
258
+ ├── __init__.py
259
+ ├── anthropic
260
+ │   ├── __init__.py
261
+ │   ├── test_llm.py # LLM Testing
262
+ │   └── test_provider.py # Provider Testing
263
+ ```
264
+
265
+ Write test code for all the various cases implemented above and submit the code after passing the tests.
api/core/model_runtime/docs/en_US/schema.md ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration Rules
2
+
3
+ - Provider rules are based on the [Provider](#Provider) entity.
4
+ - Model rules are based on the [AIModelEntity](#AIModelEntity) entity.
5
+
6
+ > All entities mentioned below are based on `Pydantic BaseModel` and can be found in the `entities` module.
7
+
8
+ ### Provider
9
+
10
+ - `provider` (string) Provider identifier, e.g., `openai`
11
+ - `label` (object) Provider display name, i18n, with `en_US` English and `zh_Hans` Chinese language settings
12
+ - `zh_Hans` (string) [optional] Chinese label name, if `zh_Hans` is not set, `en_US` will be used by default.
13
+ - `en_US` (string) English label name
14
+ - `description` (object) Provider description, i18n
15
+ - `zh_Hans` (string) [optional] Chinese description
16
+ - `en_US` (string) English description
17
+ - `icon_small` (string) [optional] Small provider ICON, stored in the `_assets` directory under the corresponding provider implementation directory, with the same language strategy as `label`
18
+ - `zh_Hans` (string) Chinese ICON
19
+ - `en_US` (string) English ICON
20
+ - `icon_large` (string) [optional] Large provider ICON, stored in the `_assets` directory under the corresponding provider implementation directory, with the same language strategy as `label`
21
+ - `zh_Hans` (string) Chinese ICON
22
+ - `en_US` (string) English ICON
23
+ - `background` (string) [optional] Background color value, e.g., #FFFFFF, if empty, the default frontend color value will be displayed.
24
+ - `help` (object) [optional] help information
25
+ - `title` (object) help title, i18n
26
+ - `zh_Hans` (string) [optional] Chinese title
27
+ - `en_US` (string) English title
28
+ - `url` (object) help link, i18n
29
+ - `zh_Hans` (string) [optional] Chinese link
30
+ - `en_US` (string) English link
31
+ - `supported_model_types` (array[[ModelType](#ModelType)]) Supported model types
32
+ - `configurate_methods` (array[[ConfigurateMethod](#ConfigurateMethod)]) Configuration methods
33
+ - `provider_credential_schema` ([ProviderCredentialSchema](#ProviderCredentialSchema)) Provider credential specification
34
+ - `model_credential_schema` ([ModelCredentialSchema](#ModelCredentialSchema)) Model credential specification
35
+
36
+ ### AIModelEntity
37
+
38
+ - `model` (string) Model identifier, e.g., `gpt-3.5-turbo`
39
+ - `label` (object) [optional] Model display name, i18n, with `en_US` English and `zh_Hans` Chinese language settings
40
+ - `zh_Hans` (string) [optional] Chinese label name
41
+ - `en_US` (string) English label name
42
+ - `model_type` ([ModelType](#ModelType)) Model type
43
+ - `features` (array[[ModelFeature](#ModelFeature)]) [optional] Supported feature list
44
+ - `model_properties` (object) Model properties
45
+ - `mode` ([LLMMode](#LLMMode)) Mode (available for model type `llm`)
46
+ - `context_size` (int) Context size (available for model types `llm`, `text-embedding`)
47
+ - `max_chunks` (int) Maximum number of chunks (available for model types `text-embedding`, `moderation`)
48
+ - `file_upload_limit` (int) Maximum file upload limit, in MB (available for model type `speech2text`)
49
+ - `supported_file_extensions` (string) Supported file extension formats, e.g., mp3, mp4 (available for model type `speech2text`)
50
+ - `default_voice` (string) default voice, e.g.:alloy,echo,fable,onyx,nova,shimmer(available for model type `tts`)
51
+ - `voices` (list) List of available voice.(available for model type `tts`)
52
+ - `mode` (string) voice model.(available for model type `tts`)
53
+ - `name` (string) voice model display name.(available for model type `tts`)
54
+ - `language` (string) the voice model supports languages.(available for model type `tts`)
55
+ - `word_limit` (int) Single conversion word limit, paragraph-wise by default(available for model type `tts`)
56
+ - `audio_type` (string) Support audio file extension format, e.g.:mp3,wav(available for model type `tts`)
57
+ - `max_workers` (int) Number of concurrent workers supporting text and audio conversion(available for model type`tts`)
58
+ - `max_characters_per_chunk` (int) Maximum characters per chunk (available for model type `moderation`)
59
+ - `parameter_rules` (array[[ParameterRule](#ParameterRule)]) [optional] Model invocation parameter rules
60
+ - `pricing` ([PriceConfig](#PriceConfig)) [optional] Pricing information
61
+ - `deprecated` (bool) Whether deprecated. If deprecated, the model will no longer be displayed in the list, but those already configured can continue to be used. Default False.
62
+
63
+ ### ModelType
64
+
65
+ - `llm` Text generation model
66
+ - `text-embedding` Text Embedding model
67
+ - `rerank` Rerank model
68
+ - `speech2text` Speech to text
69
+ - `tts` Text to speech
70
+ - `moderation` Moderation
71
+
72
+ ### ConfigurateMethod
73
+
74
+ - `predefined-model` Predefined model
75
+
76
+ Indicates that users can use the predefined models under the provider by configuring the unified provider credentials.
77
+ - `customizable-model` Customizable model
78
+
79
+ Users need to add credential configuration for each model.
80
+
81
+ - `fetch-from-remote` Fetch from remote
82
+
83
+ Consistent with the `predefined-model` configuration method, only unified provider credentials need to be configured, and models are obtained from the provider through credential information.
84
+
85
+ ### ModelFeature
86
+
87
+ - `agent-thought` Agent reasoning, generally over 70B with thought chain capability.
88
+ - `vision` Vision, i.e., image understanding.
89
+ - `tool-call`
90
+ - `multi-tool-call`
91
+ - `stream-tool-call`
92
+
93
+ ### FetchFrom
94
+
95
+ - `predefined-model` Predefined model
96
+ - `fetch-from-remote` Remote model
97
+
98
+ ### LLMMode
99
+
100
+ - `complete` Text completion
101
+ - `chat` Dialogue
102
+
103
+ ### ParameterRule
104
+
105
+ - `name` (string) Actual model invocation parameter name
106
+ - `use_template` (string) [optional] Using template
107
+
108
+ By default, 5 variable content configuration templates are preset:
109
+
110
+ - `temperature`
111
+ - `top_p`
112
+ - `frequency_penalty`
113
+ - `presence_penalty`
114
+ - `max_tokens`
115
+
116
+ In use_template, you can directly set the template variable name, which will use the default configuration in entities.defaults.PARAMETER_RULE_TEMPLATE
117
+ No need to set any parameters other than `name` and `use_template`. If additional configuration parameters are set, they will override the default configuration.
118
+ Refer to `openai/llm/gpt-3.5-turbo.yaml`.
119
+
120
+ - `label` (object) [optional] Label, i18n
121
+
122
+ - `zh_Hans`(string) [optional] Chinese label name
123
+ - `en_US` (string) English label name
124
+
125
+ - `type`(string) [optional] Parameter type
126
+
127
+ - `int` Integer
128
+ - `float` Float
129
+ - `string` String
130
+ - `boolean` Boolean
131
+
132
+ - `help` (string) [optional] Help information
133
+
134
+ - `zh_Hans` (string) [optional] Chinese help information
135
+ - `en_US` (string) English help information
136
+
137
+ - `required` (bool) Required, default False.
138
+
139
+ - `default`(int/float/string/bool) [optional] Default value
140
+
141
+ - `min`(int/float) [optional] Minimum value, applicable only to numeric types
142
+
143
+ - `max`(int/float) [optional] Maximum value, applicable only to numeric types
144
+
145
+ - `precision`(int) [optional] Precision, number of decimal places to keep, applicable only to numeric types
146
+
147
+ - `options` (array[string]) [optional] Dropdown option values, applicable only when `type` is `string`, if not set or null, option values are not restricted
148
+
149
+ ### PriceConfig
150
+
151
+ - `input` (float) Input price, i.e., Prompt price
152
+ - `output` (float) Output price, i.e., returned content price
153
+ - `unit` (float) Pricing unit, e.g., if the price is measured in 1M tokens, the corresponding token amount for the unit price is `0.000001`.
154
+ - `currency` (string) Currency unit
155
+
156
+ ### ProviderCredentialSchema
157
+
158
+ - `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) Credential form standard
159
+
160
+ ### ModelCredentialSchema
161
+
162
+ - `model` (object) Model identifier, variable name defaults to `model`
163
+ - `label` (object) Model form item display name
164
+ - `en_US` (string) English
165
+ - `zh_Hans`(string) [optional] Chinese
166
+ - `placeholder` (object) Model prompt content
167
+ - `en_US`(string) English
168
+ - `zh_Hans`(string) [optional] Chinese
169
+ - `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) Credential form standard
170
+
171
+ ### CredentialFormSchema
172
+
173
+ - `variable` (string) Form item variable name
174
+ - `label` (object) Form item label name
175
+ - `en_US`(string) English
176
+ - `zh_Hans` (string) [optional] Chinese
177
+ - `type` ([FormType](#FormType)) Form item type
178
+ - `required` (bool) Whether required
179
+ - `default`(string) Default value
180
+ - `options` (array[[FormOption](#FormOption)]) Specific property of form items of type `select` or `radio`, defining dropdown content
181
+ - `placeholder`(object) Specific property of form items of type `text-input`, placeholder content
182
+ - `en_US`(string) English
183
+ - `zh_Hans` (string) [optional] Chinese
184
+ - `max_length` (int) Specific property of form items of type `text-input`, defining maximum input length, 0 for no limit.
185
+ - `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) Displayed when other form item values meet certain conditions, displayed always if empty.
186
+
187
+ ### FormType
188
+
189
+ - `text-input` Text input component
190
+ - `secret-input` Password input component
191
+ - `select` Single-choice dropdown
192
+ - `radio` Radio component
193
+ - `switch` Switch component, only supports `true` and `false` values
194
+
195
+ ### FormOption
196
+
197
+ - `label` (object) Label
198
+ - `en_US`(string) English
199
+ - `zh_Hans`(string) [optional] Chinese
200
+ - `value` (string) Dropdown option value
201
+ - `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) Displayed when other form item values meet certain conditions, displayed always if empty.
202
+
203
+ ### FormShowOnObject
204
+
205
+ - `variable` (string) Variable name of other form items
206
+ - `value` (string) Variable value of other form items
api/core/model_runtime/docs/zh_Hans/customizable_model_scale_out.md ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 自定义预定义模型接入
2
+
3
+ ### 介绍
4
+
5
+ 供应商集成完成后,接下来为供应商下模型的接入,为了帮助理解整个接入过程,我们以`Xinference`为例,逐步完成一个完整的供应商接入。
6
+
7
+ 需要注意的是,对于自定义模型,每一个模型的接入都需要填写一个完整的供应商凭据。
8
+
9
+ 而不同于预定义模型,自定义供应商接入时永远会拥有如下两个参数,不需要在供应商yaml中定义。
10
+
11
+ ![Alt text](images/index/image-3.png)
12
+
13
+
14
+ 在前文中,我们已经知道了供应商无需实现`validate_provider_credential`,Runtime会自行根据用户在此选择的模型类型和模型名称调用对应的模型层的`validate_credentials`来进行验证。
15
+
16
+ ### 编写供应商yaml
17
+
18
+ 我们首先要确定,接入的这个供应商支持哪些类型的模型。
19
+
20
+ 当前支持模型类型如下:
21
+
22
+ - `llm` 文本生成模型
23
+ - `text_embedding` 文本 Embedding 模型
24
+ - `rerank` Rerank 模型
25
+ - `speech2text` 语音转文字
26
+ - `tts` 文字转语音
27
+ - `moderation` 审查
28
+
29
+ `Xinference`支持`LLM`和`Text Embedding`和Rerank,那么我们开始编写`xinference.yaml`。
30
+
31
+ ```yaml
32
+ provider: xinference #确定供应商标识
33
+ label: # 供应商展示名称,可设置 en_US 英文、zh_Hans 中文两种语言,zh_Hans 不设置将默认使用 en_US。
34
+ en_US: Xorbits Inference
35
+ icon_small: # 小图标,可以参考其他供应商的图标,存储在对应供应商实现目录下的 _assets 目录,中英文策略同 label
36
+ en_US: icon_s_en.svg
37
+ icon_large: # 大图标
38
+ en_US: icon_l_en.svg
39
+ help: # 帮助
40
+ title:
41
+ en_US: How to deploy Xinference
42
+ zh_Hans: 如何部署 Xinference
43
+ url:
44
+ en_US: https://github.com/xorbitsai/inference
45
+ supported_model_types: # 支持的模型类型,Xinference同时支持LLM/Text Embedding/Rerank
46
+ - llm
47
+ - text-embedding
48
+ - rerank
49
+ configurate_methods: # 因为Xinference为本地部署的供应商,并且没有预定义模型,需要用什么模型需要根据Xinference的文档自己部署,所以这里只支持自定义模型
50
+ - customizable-model
51
+ provider_credential_schema:
52
+ credential_form_schemas:
53
+ ```
54
+
55
+ 随后,我们需要思考在Xinference中定义一个模型需要哪些凭据
56
+
57
+ - 它支持三种不同的模型,因此,我们需要有`model_type`来指定这个模型的类型,它有三种类型,所以我们这么编写
58
+ ```yaml
59
+ provider_credential_schema:
60
+ credential_form_schemas:
61
+ - variable: model_type
62
+ type: select
63
+ label:
64
+ en_US: Model type
65
+ zh_Hans: 模型类型
66
+ required: true
67
+ options:
68
+ - value: text-generation
69
+ label:
70
+ en_US: Language Model
71
+ zh_Hans: 语言模型
72
+ - value: embeddings
73
+ label:
74
+ en_US: Text Embedding
75
+ - value: reranking
76
+ label:
77
+ en_US: Rerank
78
+ ```
79
+ - 每一个模型都有自己的名称`model_name`,因此需要在这里定义
80
+ ```yaml
81
+ - variable: model_name
82
+ type: text-input
83
+ label:
84
+ en_US: Model name
85
+ zh_Hans: 模型名称
86
+ required: true
87
+ placeholder:
88
+ zh_Hans: 填写模型名称
89
+ en_US: Input model name
90
+ ```
91
+ - 填写Xinference本地部署的地址
92
+ ```yaml
93
+ - variable: server_url
94
+ label:
95
+ zh_Hans: 服务器URL
96
+ en_US: Server url
97
+ type: text-input
98
+ required: true
99
+ placeholder:
100
+ zh_Hans: 在此输入Xinference的服务器地址,如 https://example.com/xxx
101
+ en_US: Enter the url of your Xinference, for example https://example.com/xxx
102
+ ```
103
+ - 每个模型都有唯一的model_uid,因此需要在这里定义
104
+ ```yaml
105
+ - variable: model_uid
106
+ label:
107
+ zh_Hans: 模型UID
108
+ en_US: Model uid
109
+ type: text-input
110
+ required: true
111
+ placeholder:
112
+ zh_Hans: 在此输入您的Model UID
113
+ en_US: Enter the model uid
114
+ ```
115
+ 现在,我们就完成了供应商的基础定义。
116
+
117
+ ### 编写模型代码
118
+
119
+ 然后我们以`llm`类型为例,编写`xinference.llm.llm.py`
120
+
121
+ 在 `llm.py` 中创建一个 Xinference LLM 类,我们取名为 `XinferenceAILargeLanguageModel`(随意),继承 `__base.large_language_model.LargeLanguageModel` 基类,实现以下几个方法:
122
+
123
+ - LLM 调用
124
+
125
+ 实现 LLM 调用的核心方法,可同时支持流式和同步返回。
126
+
127
+ ```python
128
+ def _invoke(self, model: str, credentials: dict,
129
+ prompt_messages: list[PromptMessage], model_parameters: dict,
130
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
131
+ stream: bool = True, user: Optional[str] = None) \
132
+ -> Union[LLMResult, Generator]:
133
+ """
134
+ Invoke large language model
135
+
136
+ :param model: model name
137
+ :param credentials: model credentials
138
+ :param prompt_messages: prompt messages
139
+ :param model_parameters: model parameters
140
+ :param tools: tools for tool calling
141
+ :param stop: stop words
142
+ :param stream: is stream response
143
+ :param user: unique user id
144
+ :return: full response or stream response chunk generator result
145
+ """
146
+ ```
147
+
148
+ 在实现时,需要注意使用两个函数来返回数据,分别用于处理同步返回和流式返回,因为Python会将函数中包含 `yield` 关键字的函数识别为生成器函数,返回的数据类型固定为 `Generator`,因此同步和流式返回需要分别实现,就像下面这样(注意下面例子使用了简化参数,实际实现时需要按照上面的参数列表进行实现):
149
+
150
+ ```python
151
+ def _invoke(self, stream: bool, **kwargs) \
152
+ -> Union[LLMResult, Generator]:
153
+ if stream:
154
+ return self._handle_stream_response(**kwargs)
155
+ return self._handle_sync_response(**kwargs)
156
+
157
+ def _handle_stream_response(self, **kwargs) -> Generator:
158
+ for chunk in response:
159
+ yield chunk
160
+ def _handle_sync_response(self, **kwargs) -> LLMResult:
161
+ return LLMResult(**response)
162
+ ```
163
+
164
+ - 预计算输入 tokens
165
+
166
+ 若模型未提供预计算 tokens 接口,可直接返回 0。
167
+
168
+ ```python
169
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
170
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
171
+ """
172
+ Get number of tokens for given prompt messages
173
+
174
+ :param model: model name
175
+ :param credentials: model credentials
176
+ :param prompt_messages: prompt messages
177
+ :param tools: tools for tool calling
178
+ :return:
179
+ """
180
+ ```
181
+
182
+ 有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。
183
+
184
+ - 模型凭据校验
185
+
186
+ 与供应商凭据校验类似,这里针对单个模型进行校验。
187
+
188
+ ```python
189
+ def validate_credentials(self, model: str, credentials: dict) -> None:
190
+ """
191
+ Validate model credentials
192
+
193
+ :param model: model name
194
+ :param credentials: model credentials
195
+ :return:
196
+ """
197
+ ```
198
+
199
+ - 模型参数Schema
200
+
201
+ 与自定义类型不同,由于没有在yaml文件中定义一个模型支持哪些参数,因此,我们需要动态时间模型参数的Schema。
202
+
203
+ 如Xinference支持`max_tokens` `temperature` `top_p` 这三个模型参数。
204
+
205
+ 但是有的供应商根据不同的模型支持不同的参数,如供应商`OpenLLM`支持`top_k`,但是并不是这个供应商提供的所有模型都支持`top_k`,我们这里举例A模型支持`top_k`,B模型不支持`top_k`,那么我们需要在这里动态生成模型参数的Schema,如下所示:
206
+
207
+ ```python
208
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
209
+ """
210
+ used to define customizable model schema
211
+ """
212
+ rules = [
213
+ ParameterRule(
214
+ name='temperature', type=ParameterType.FLOAT,
215
+ use_template='temperature',
216
+ label=I18nObject(
217
+ zh_Hans='温度', en_US='Temperature'
218
+ )
219
+ ),
220
+ ParameterRule(
221
+ name='top_p', type=ParameterType.FLOAT,
222
+ use_template='top_p',
223
+ label=I18nObject(
224
+ zh_Hans='Top P', en_US='Top P'
225
+ )
226
+ ),
227
+ ParameterRule(
228
+ name='max_tokens', type=ParameterType.INT,
229
+ use_template='max_tokens',
230
+ min=1,
231
+ default=512,
232
+ label=I18nObject(
233
+ zh_Hans='最大生成长度', en_US='Max Tokens'
234
+ )
235
+ )
236
+ ]
237
+
238
+ # if model is A, add top_k to rules
239
+ if model == 'A':
240
+ rules.append(
241
+ ParameterRule(
242
+ name='top_k', type=ParameterType.INT,
243
+ use_template='top_k',
244
+ min=1,
245
+ default=50,
246
+ label=I18nObject(
247
+ zh_Hans='Top K', en_US='Top K'
248
+ )
249
+ )
250
+ )
251
+
252
+ """
253
+ some NOT IMPORTANT code here
254
+ """
255
+
256
+ entity = AIModelEntity(
257
+ model=model,
258
+ label=I18nObject(
259
+ en_US=model
260
+ ),
261
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
262
+ model_type=model_type,
263
+ model_properties={
264
+ ModelPropertyKey.MODE: ModelType.LLM,
265
+ },
266
+ parameter_rules=rules
267
+ )
268
+
269
+ return entity
270
+ ```
271
+
272
+ - 调用异常错误映射表
273
+
274
+ 当模型调用异常时需要映射到 Runtime 指定的 `InvokeError` 类型,方便 Dify 针对不同错误做不同后续处理。
275
+
276
+ Runtime Errors:
277
+
278
+ - `InvokeConnectionError` 调用连接错误
279
+ - `InvokeServerUnavailableError ` 调用服务方不可用
280
+ - `InvokeRateLimitError ` 调用达到限额
281
+ - `InvokeAuthorizationError` 调用鉴权失败
282
+ - `InvokeBadRequestError ` 调用传参有误
283
+
284
+ ```python
285
+ @property
286
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
287
+ """
288
+ Map model invoke error to unified error
289
+ The key is the error type thrown to the caller
290
+ The value is the error type thrown by the model,
291
+ which needs to be converted into a unified error type for the caller.
292
+
293
+ :return: Invoke error mapping
294
+ """
295
+ ```
296
+
297
+ 接口方法说明见:[Interfaces](./interfaces.md),具体实现可参考:[llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py)。
api/core/model_runtime/docs/zh_Hans/images/index/image-1.png ADDED

Git LFS Details

  • SHA256: aa7b8b17348097b21e33016b526285c74549411ac1f06f3b679acb1a0a94ed9f
  • Pointer size: 131 Bytes
  • Size of remote file: 235 kB
api/core/model_runtime/docs/zh_Hans/images/index/image-2.png ADDED

Git LFS Details

  • SHA256: f90954338d7931b5e3d72cab0b10750f25767a10c08defae3e521cb3c82decff
  • Pointer size: 131 Bytes
  • Size of remote file: 210 kB
api/core/model_runtime/docs/zh_Hans/images/index/image-20231210143654461.png ADDED

Git LFS Details

  • SHA256: 5dce42845fe8087a9430ce99119ceb4ad466fc2dace3e142209f7265abbc4470
  • Pointer size: 131 Bytes
  • Size of remote file: 394 kB
api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144229650.png ADDED

Git LFS Details

  • SHA256: fe26d95f692868822fee5601a6f7163cbd5da01c7e6436c5e8dd55a8ea1e9f75
  • Pointer size: 131 Bytes
  • Size of remote file: 115 kB
api/core/model_runtime/docs/zh_Hans/images/index/image-20231210144814617.png ADDED

Git LFS Details

  • SHA256: 1fd647063c5900fc541bfed8477a7122acbc46d6a296c0e84fb49dea984733b3
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
api/core/model_runtime/docs/zh_Hans/images/index/image-20231210151548521.png ADDED
api/core/model_runtime/docs/zh_Hans/images/index/image-20231210151628992.png ADDED
api/core/model_runtime/docs/zh_Hans/images/index/image-20231210165243632.png ADDED

Git LFS Details

  • SHA256: dc4ae12b6d85610e2e70a07dfc20f9877d752490d706b0fa3682dfb211a7d844
  • Pointer size: 131 Bytes
  • Size of remote file: 554 kB
api/core/model_runtime/docs/zh_Hans/images/index/image-3.png ADDED
api/core/model_runtime/docs/zh_Hans/images/index/image.png ADDED

Git LFS Details

  • SHA256: 6008899b8b7b57af7c7e83509a6116dbc3f2c6cf040ae67eb1a1ae1fb78c3ede
  • Pointer size: 131 Bytes
  • Size of remote file: 268 kB
api/core/model_runtime/docs/zh_Hans/interfaces.md ADDED
@@ -0,0 +1,746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 接口方法
2
+
3
+ 这里介绍供应商和各模型类型需要实现的接口方法和参数说明。
4
+
5
+ ## 供应商
6
+
7
+ 继承 `__base.model_provider.ModelProvider` 基类,实现以下接口:
8
+
9
+ ```python
10
+ def validate_provider_credentials(self, credentials: dict) -> None:
11
+ """
12
+ Validate provider credentials
13
+ You can choose any validate_credentials method of model type or implement validate method by yourself,
14
+ such as: get model list api
15
+
16
+ if validate failed, raise exception
17
+
18
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
19
+ """
20
+ ```
21
+
22
+ - `credentials` (object) 凭据信息
23
+
24
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 定义,传入如:`api_key` 等。
25
+
26
+ 验证失败请抛出 `errors.validate.CredentialsValidateFailedError` 错误。
27
+
28
+ **注:预定义模型需完整实现该接口,自定义模型供应商只需要如下简单实现即可**
29
+
30
+ ```python
31
+ class XinferenceProvider(Provider):
32
+ def validate_provider_credentials(self, credentials: dict) -> None:
33
+ pass
34
+ ```
35
+
36
+ ## 模型
37
+
38
+ 模型分为 5 种不同的模型类型,不同模型类型继承的基类不同,需要实现的方法也不同。
39
+
40
+ ### 通用接口
41
+
42
+ 所有模型均需要统一实现下面 2 个方法:
43
+
44
+ - 模型凭据校验
45
+
46
+ 与供应商凭据校验类似,这里针对单个模型进行校验。
47
+
48
+ ```python
49
+ def validate_credentials(self, model: str, credentials: dict) -> None:
50
+ """
51
+ Validate model credentials
52
+
53
+ :param model: model name
54
+ :param credentials: model credentials
55
+ :return:
56
+ """
57
+ ```
58
+
59
+ 参数:
60
+
61
+ - `model` (string) 模型名称
62
+
63
+ - `credentials` (object) 凭据信息
64
+
65
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
66
+
67
+ 验证失败请抛出 `errors.validate.CredentialsValidateFailedError` 错误。
68
+
69
+ - 调用异常错误映射表
70
+
71
+ 当模型调用异常时需要映射到 Runtime 指定的 `InvokeError` 类型,方便 Dify 针对不同错误做不同后续处理。
72
+
73
+ Runtime Errors:
74
+
75
+ - `InvokeConnectionError` 调用连接错误
76
+ - `InvokeServerUnavailableError ` 调用服务方不可用
77
+ - `InvokeRateLimitError ` 调用达到限额
78
+ - `InvokeAuthorizationError` 调用鉴权失败
79
+ - `InvokeBadRequestError ` 调用传参有误
80
+
81
+ ```python
82
+ @property
83
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
84
+ """
85
+ Map model invoke error to unified error
86
+ The key is the error type thrown to the caller
87
+ The value is the error type thrown by the model,
88
+ which needs to be converted into a unified error type for the caller.
89
+
90
+ :return: Invoke error mapping
91
+ """
92
+ ```
93
+
94
+ 也可以直接抛出对应 Errors,并做如下定义,这样在之后的调用中可以直接抛出`InvokeConnectionError`等异常。
95
+
96
+ ```python
97
+ @property
98
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
99
+ return {
100
+ InvokeConnectionError: [
101
+ InvokeConnectionError
102
+ ],
103
+ InvokeServerUnavailableError: [
104
+ InvokeServerUnavailableError
105
+ ],
106
+ InvokeRateLimitError: [
107
+ InvokeRateLimitError
108
+ ],
109
+ InvokeAuthorizationError: [
110
+ InvokeAuthorizationError
111
+ ],
112
+ InvokeBadRequestError: [
113
+ InvokeBadRequestError
114
+ ],
115
+ }
116
+ ```
117
+
118
+ ​ 可参考 OpenAI `_invoke_error_mapping`。
119
+
120
+ ### LLM
121
+
122
+ 继承 `__base.large_language_model.LargeLanguageModel` 基类,实现以下接口:
123
+
124
+ - LLM 调用
125
+
126
+ 实现 LLM 调用的核心方法,可同时支持流式和同步返回。
127
+
128
+ ```python
129
+ def _invoke(self, model: str, credentials: dict,
130
+ prompt_messages: list[PromptMessage], model_parameters: dict,
131
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
132
+ stream: bool = True, user: Optional[str] = None) \
133
+ -> Union[LLMResult, Generator]:
134
+ """
135
+ Invoke large language model
136
+
137
+ :param model: model name
138
+ :param credentials: model credentials
139
+ :param prompt_messages: prompt messages
140
+ :param model_parameters: model parameters
141
+ :param tools: tools for tool calling
142
+ :param stop: stop words
143
+ :param stream: is stream response
144
+ :param user: unique user id
145
+ :return: full response or stream response chunk generator result
146
+ """
147
+ ```
148
+
149
+ - 参数:
150
+
151
+ - `model` (string) 模型名称
152
+
153
+ - `credentials` (object) 凭据信息
154
+
155
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
156
+
157
+ - `prompt_messages` (array[[PromptMessage](#PromptMessage)]) Prompt 列��
158
+
159
+ 若模型为 `Completion` 类型,则列表只需要传入一个 [UserPromptMessage](#UserPromptMessage) 元素即可;
160
+
161
+ 若模型为 `Chat` 类型,需要根据消息不同传入 [SystemPromptMessage](#SystemPromptMessage), [UserPromptMessage](#UserPromptMessage), [AssistantPromptMessage](#AssistantPromptMessage), [ToolPromptMessage](#ToolPromptMessage) 元素列表
162
+
163
+ - `model_parameters` (object) 模型参数
164
+
165
+ 模型参数由模型 YAML 配置的 `parameter_rules` 定义。
166
+
167
+ - `tools` (array[[PromptMessageTool](#PromptMessageTool)]) [optional] 工具列表,等同于 `function calling` 中的 `function`。
168
+
169
+ 即传入 tool calling 的工具列表。
170
+
171
+ - `stop` (array[string]) [optional] 停止序列
172
+
173
+ 模型返回将在停止序列定义的字符串之前停止输出。
174
+
175
+ - `stream` (bool) 是否流式输出,默认 True
176
+
177
+ 流式输出返回 Generator[[LLMResultChunk](#LLMResultChunk)],非流式输出返回 [LLMResult](#LLMResult)。
178
+
179
+ - `user` (string) [optional] 用户的唯一标识符
180
+
181
+ 可以帮助供应商监控和检测滥用行为。
182
+
183
+ - 返回
184
+
185
+ 流式输出返回 Generator[[LLMResultChunk](#LLMResultChunk)],非流式输出返回 [LLMResult](#LLMResult)。
186
+
187
+ - 预计算输入 tokens
188
+
189
+ 若模型未提供预计算 tokens 接口,可直接返回 0。
190
+
191
+ ```python
192
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
193
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
194
+ """
195
+ Get number of tokens for given prompt messages
196
+
197
+ :param model: model name
198
+ :param credentials: model credentials
199
+ :param prompt_messages: prompt messages
200
+ :param tools: tools for tool calling
201
+ :return:
202
+ """
203
+ ```
204
+
205
+ 参数说明见上述 `LLM 调用`。
206
+
207
+ 该接口需要根据对应`model`选择合适的`tokenizer`进行计算,如果对应模型没有提供`tokenizer`,可以使用`AIModel`基类中的`_get_num_tokens_by_gpt2(text: str)`方法进行计算。
208
+
209
+ - 获取自定义模型规则 [可选]
210
+
211
+ ```python
212
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> Optional[AIModelEntity]:
213
+ """
214
+ Get customizable model schema
215
+
216
+ :param model: model name
217
+ :param credentials: model credentials
218
+ :return: model schema
219
+ """
220
+ ```
221
+
222
+ ​当供应商支持增加自定义 LLM 时,可实现此方法让自定义模型可获取模型规则,默认返回 None。
223
+
224
+ 对于`OpenAI`供应商下的大部分微调模型,可以通过其微调模型名称获取到其基类模型,如`gpt-3.5-turbo-1106`,然后返回基类模型的预定义参数规则,参考[openai](https://github.com/langgenius/dify/blob/feat/model-runtime/api/core/model_runtime/model_providers/openai/llm/llm.py#L801)
225
+ 的具体实现
226
+
227
+ ### TextEmbedding
228
+
229
+ 继承 `__base.text_embedding_model.TextEmbeddingModel` 基类,实现以下接口:
230
+
231
+ - Embedding 调用
232
+
233
+ ```python
234
+ def _invoke(self, model: str, credentials: dict,
235
+ texts: list[str], user: Optional[str] = None) \
236
+ -> TextEmbeddingResult:
237
+ """
238
+ Invoke large language model
239
+
240
+ :param model: model name
241
+ :param credentials: model credentials
242
+ :param texts: texts to embed
243
+ :param user: unique user id
244
+ :return: embeddings result
245
+ """
246
+ ```
247
+
248
+ - 参数:
249
+
250
+ - `model` (string) 模型名称
251
+
252
+ - `credentials` (object) 凭据信息
253
+
254
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
255
+
256
+ - `texts` (array[string]) 文本列表,可批量处理
257
+
258
+ - `user` (string) [optional] 用户的唯一标识符
259
+
260
+ 可以帮助供应商监控和检测滥用行为。
261
+
262
+ - 返回:
263
+
264
+ [TextEmbeddingResult](#TextEmbeddingResult) 实体。
265
+
266
+ - 预计算 tokens
267
+
268
+ ```python
269
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
270
+ """
271
+ Get number of tokens for given prompt messages
272
+
273
+ :param model: model name
274
+ :param credentials: model credentials
275
+ :param texts: texts to embed
276
+ :return:
277
+ """
278
+ ```
279
+
280
+ 参数说明见上述 `Embedding 调用`。
281
+
282
+ 同上述`LargeLanguageModel`,该接口需要根据对应`model`选择合适的`tokenizer`进行计算,如果对应模型没有提供`tokenizer`,可以使用`AIModel`基类中的`_get_num_tokens_by_gpt2(text: str)`方法进行计算。
283
+
284
+ ### Rerank
285
+
286
+ 继承 `__base.rerank_model.RerankModel` 基类,实现以下接口:
287
+
288
+ - rerank 调用
289
+
290
+ ```python
291
+ def _invoke(self, model: str, credentials: dict,
292
+ query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
293
+ user: Optional[str] = None) \
294
+ -> RerankResult:
295
+ """
296
+ Invoke rerank model
297
+
298
+ :param model: model name
299
+ :param credentials: model credentials
300
+ :param query: search query
301
+ :param docs: docs for reranking
302
+ :param score_threshold: score threshold
303
+ :param top_n: top n
304
+ :param user: unique user id
305
+ :return: rerank result
306
+ """
307
+ ```
308
+
309
+ - 参数:
310
+
311
+ - `model` (string) 模型名称
312
+
313
+ - `credentials` (object) 凭据信息
314
+
315
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
316
+
317
+ - `query` (string) 查询请求内容
318
+
319
+ - `docs` (array[string]) 需要重排的分段列表
320
+
321
+ - `score_threshold` (float) [optional] Score 阈值
322
+
323
+ - `top_n` (int) [optional] 取前 n 个分段
324
+
325
+ - `user` (string) [optional] 用户的唯一标识符
326
+
327
+ 可以帮助供应商监控和检测滥用行为。
328
+
329
+ - 返回:
330
+
331
+ [RerankResult](#RerankResult) 实体。
332
+
333
+ ### Speech2text
334
+
335
+ 继承 `__base.speech2text_model.Speech2TextModel` 基类,实现以下接口:
336
+
337
+ - Invoke 调用
338
+
339
+ ```python
340
+ def _invoke(self, model: str, credentials: dict,
341
+ file: IO[bytes], user: Optional[str] = None) \
342
+ -> str:
343
+ """
344
+ Invoke large language model
345
+
346
+ :param model: model name
347
+ :param credentials: model credentials
348
+ :param file: audio file
349
+ :param user: unique user id
350
+ :return: text for given audio file
351
+ """
352
+ ```
353
+
354
+ - 参数:
355
+
356
+ - `model` (string) 模型名称
357
+
358
+ - `credentials` (object) 凭据信息
359
+
360
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
361
+
362
+ - `file` (File) 文件流
363
+
364
+ - `user` (string) [optional] 用户的唯一标识符
365
+
366
+ 可以帮助供应商监控和检测滥用行为。
367
+
368
+ - 返回:
369
+
370
+ 语音转换后的字符串。
371
+
372
+ ### Text2speech
373
+
374
+ 继承 `__base.text2speech_model.Text2SpeechModel` 基类,实现以下接口:
375
+
376
+ - Invoke 调用
377
+
378
+ ```python
379
+ def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None):
380
+ """
381
+ Invoke large language model
382
+
383
+ :param model: model name
384
+ :param credentials: model credentials
385
+ :param content_text: text content to be translated
386
+ :param streaming: output is streaming
387
+ :param user: unique user id
388
+ :return: translated audio file
389
+ """
390
+ ```
391
+
392
+ - 参数:
393
+
394
+ - `model` (string) 模型名称
395
+
396
+ - `credentials` (object) 凭据信息
397
+
398
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
399
+
400
+ - `content_text` (string) 需要转换的文本内容
401
+
402
+ - `streaming` (bool) 是否进行流式输出
403
+
404
+ - `user` (string) [optional] 用户的唯一标识符
405
+
406
+ 可以帮助供应商监控和检测滥用行为。
407
+
408
+ - 返回:
409
+
410
+ 文本转换后的语音流。
411
+
412
+ ### Moderation
413
+
414
+ 继承 `__base.moderation_model.ModerationModel` 基类,实现以下接口:
415
+
416
+ - Invoke 调用
417
+
418
+ ```python
419
+ def _invoke(self, model: str, credentials: dict,
420
+ text: str, user: Optional[str] = None) \
421
+ -> bool:
422
+ """
423
+ Invoke large language model
424
+
425
+ :param model: model name
426
+ :param credentials: model credentials
427
+ :param text: text to moderate
428
+ :param user: unique user id
429
+ :return: false if text is safe, true otherwise
430
+ """
431
+ ```
432
+
433
+ - 参数:
434
+
435
+ - `model` (string) 模型名称
436
+
437
+ - `credentials` (object) 凭据信息
438
+
439
+ 凭据信息的参数由供应商 YAML 配置文件的 `provider_credential_schema` 或 `model_credential_schema` 定义,传入如:`api_key` 等。
440
+
441
+ - `text` (string) 文本内容
442
+
443
+ - `user` (string) [optional] 用户的唯一标识符
444
+
445
+ 可以帮助供应商监控和检测滥用行为。
446
+
447
+ - 返回:
448
+
449
+ False 代表传入的文本安全,True 则反之。
450
+
451
+
452
+
453
+ ## 实体
454
+
455
+ ### PromptMessageRole
456
+
457
+ 消息角色
458
+
459
+ ```python
460
+ class PromptMessageRole(Enum):
461
+ """
462
+ Enum class for prompt message.
463
+ """
464
+ SYSTEM = "system"
465
+ USER = "user"
466
+ ASSISTANT = "assistant"
467
+ TOOL = "tool"
468
+ ```
469
+
470
+ ### PromptMessageContentType
471
+
472
+ 消息内容类型,分为纯文本和图片。
473
+
474
+ ```python
475
+ class PromptMessageContentType(Enum):
476
+ """
477
+ Enum class for prompt message content type.
478
+ """
479
+ TEXT = 'text'
480
+ IMAGE = 'image'
481
+ ```
482
+
483
+ ### PromptMessageContent
484
+
485
+ 消息内容基类,仅作为参数声明用,不可初始化。
486
+
487
+ ```python
488
+ class PromptMessageContent(BaseModel):
489
+ """
490
+ Model class for prompt message content.
491
+ """
492
+ type: PromptMessageContentType
493
+ data: str # 内容数据
494
+ ```
495
+
496
+ 当前支持文本和图片两种类型,可支持同时传入文本和多图。
497
+
498
+ 需要分别初始化 `TextPromptMessageContent` 和 `ImagePromptMessageContent` 传入。
499
+
500
+ ### TextPromptMessageContent
501
+
502
+ ```python
503
+ class TextPromptMessageContent(PromptMessageContent):
504
+ """
505
+ Model class for text prompt message content.
506
+ """
507
+ type: PromptMessageContentType = PromptMessageContentType.TEXT
508
+ ```
509
+
510
+ 若传入图文,其中文字需要构造此实体作为 `content` 列表中的一部分。
511
+
512
+ ### ImagePromptMessageContent
513
+
514
+ ```python
515
+ class ImagePromptMessageContent(PromptMessageContent):
516
+ """
517
+ Model class for image prompt message content.
518
+ """
519
+ class DETAIL(Enum):
520
+ LOW = 'low'
521
+ HIGH = 'high'
522
+
523
+ type: PromptMessageContentType = PromptMessageContentType.IMAGE
524
+ detail: DETAIL = DETAIL.LOW # 分辨率
525
+ ```
526
+
527
+ 若传入图文,其中图片需要构造此实体作为 `content` 列表中的一部分
528
+
529
+ `data` 可以为 `url` 或者图片 `base64` 加密后的字符串。
530
+
531
+ ### PromptMessage
532
+
533
+ 所有 Role 消息体的基类,仅作为参数声明用,不可初始化。
534
+
535
+ ```python
536
+ class PromptMessage(ABC, BaseModel):
537
+ """
538
+ Model class for prompt message.
539
+ """
540
+ role: PromptMessageRole # 消息角色
541
+ content: Optional[str | list[PromptMessageContent]] = None # 支持两种类型,字符串和内容列表,内容列表是为了满足多模态的需要,可详见 PromptMessageContent 说明。
542
+ name: Optional[str] = None # 名称,可选。
543
+ ```
544
+
545
+ ### UserPromptMessage
546
+
547
+ UserMessage 消息体,代表用户消息。
548
+
549
+ ```python
550
+ class UserPromptMessage(PromptMessage):
551
+ """
552
+ Model class for user prompt message.
553
+ """
554
+ role: PromptMessageRole = PromptMessageRole.USER
555
+ ```
556
+
557
+ ### AssistantPromptMessage
558
+
559
+ 代表模型返回消息,通常用于 `few-shots` 或聊天历史传入。
560
+
561
+ ```python
562
+ class AssistantPromptMessage(PromptMessage):
563
+ """
564
+ Model class for assistant prompt message.
565
+ """
566
+ class ToolCall(BaseModel):
567
+ """
568
+ Model class for assistant prompt message tool call.
569
+ """
570
+ class ToolCallFunction(BaseModel):
571
+ """
572
+ Model class for assistant prompt message tool call function.
573
+ """
574
+ name: str # 工具名称
575
+ arguments: str # 工具参数
576
+
577
+ id: str # 工具 ID,仅在 OpenAI tool call 生效,为工具调用的唯一 ID,同一个工具可以调用多次
578
+ type: str # 默认 function
579
+ function: ToolCallFunction # 工具调用信息
580
+
581
+ role: PromptMessageRole = PromptMessageRole.ASSISTANT
582
+ tool_calls: list[ToolCall] = [] # 模型回复的工具调用结果(仅当传入 tools,并且模型认为需要调用工具时返回)
583
+ ```
584
+
585
+ 其中 `tool_calls` 为调用模型传入 `tools` 后,由模型返回的 `tool call` 列表。
586
+
587
+ ### SystemPromptMessage
588
+
589
+ 代表系统消息,通常用于设定给模型的系统指令。
590
+
591
+ ```python
592
+ class SystemPromptMessage(PromptMessage):
593
+ """
594
+ Model class for system prompt message.
595
+ """
596
+ role: PromptMessageRole = PromptMessageRole.SYSTEM
597
+ ```
598
+
599
+ ### ToolPromptMessage
600
+
601
+ 代表工具消息,用于工具执行后将结果交给模型进行下一步计划。
602
+
603
+ ```python
604
+ class ToolPromptMessage(PromptMessage):
605
+ """
606
+ Model class for tool prompt message.
607
+ """
608
+ role: PromptMessageRole = PromptMessageRole.TOOL
609
+ tool_call_id: str # 工具调用 ID,若不支持 OpenAI tool call,也可传入工具名称
610
+ ```
611
+
612
+ 基类的 `content` 传入工具执行结果。
613
+
614
+ ### PromptMessageTool
615
+
616
+ ```python
617
+ class PromptMessageTool(BaseModel):
618
+ """
619
+ Model class for prompt message tool.
620
+ """
621
+ name: str # 工具名称
622
+ description: str # 工具描述
623
+ parameters: dict # 工具参数 dict
624
+ ```
625
+
626
+ ---
627
+
628
+ ### LLMResult
629
+
630
+ ```python
631
+ class LLMResult(BaseModel):
632
+ """
633
+ Model class for llm result.
634
+ """
635
+ model: str # 实际使用模型
636
+ prompt_messages: list[PromptMessage] # prompt 消息列表
637
+ message: AssistantPromptMessage # 回复消息
638
+ usage: LLMUsage # 使用的 tokens 及费用信息
639
+ system_fingerprint: Optional[str] = None # 请求指纹,可参考 OpenAI 该参数定义
640
+ ```
641
+
642
+ ### LLMResultChunkDelta
643
+
644
+ 流式返回中每个迭代内部 `delta` 实体
645
+
646
+ ```python
647
+ class LLMResultChunkDelta(BaseModel):
648
+ """
649
+ Model class for llm result chunk delta.
650
+ """
651
+ index: int # 序号
652
+ message: AssistantPromptMessage # 回复消息
653
+ usage: Optional[LLMUsage] = None # 使用的 tokens 及费用信息,仅最后一条返回
654
+ finish_reason: Optional[str] = None # 结束原因,仅最后一条返回
655
+ ```
656
+
657
+ ### LLMResultChunk
658
+
659
+ 流式返回中每个迭代实体
660
+
661
+ ```python
662
+ class LLMResultChunk(BaseModel):
663
+ """
664
+ Model class for llm result chunk.
665
+ """
666
+ model: str # 实际使用模型
667
+ prompt_messages: list[PromptMessage] # prompt 消息列表
668
+ system_fingerprint: Optional[str] = None # 请求指纹,可参考 OpenAI 该参数定义
669
+ delta: LLMResultChunkDelta # 每个迭代存在变化的内容
670
+ ```
671
+
672
+ ### LLMUsage
673
+
674
+ ```python
675
+ class LLMUsage(ModelUsage):
676
+ """
677
+ Model class for llm usage.
678
+ """
679
+ prompt_tokens: int # prompt 使用 tokens
680
+ prompt_unit_price: Decimal # prompt 单价
681
+ prompt_price_unit: Decimal # prompt 价格单位,即单价基于多少 tokens
682
+ prompt_price: Decimal # prompt 费用
683
+ completion_tokens: int # 回复使用 tokens
684
+ completion_unit_price: Decimal # 回复单价
685
+ completion_price_unit: Decimal # 回复价格单位,即单价基于多少 tokens
686
+ completion_price: Decimal # 回复费用
687
+ total_tokens: int # 总使用 token 数
688
+ total_price: Decimal # 总费用
689
+ currency: str # 货币单位
690
+ latency: float # 请求耗时(s)
691
+ ```
692
+
693
+ ---
694
+
695
+ ### TextEmbeddingResult
696
+
697
+ ```python
698
+ class TextEmbeddingResult(BaseModel):
699
+ """
700
+ Model class for text embedding result.
701
+ """
702
+ model: str # 实际使用模型
703
+ embeddings: list[list[float]] # embedding 向量列表,对应传入的 texts 列表
704
+ usage: EmbeddingUsage # 使用信息
705
+ ```
706
+
707
+ ### EmbeddingUsage
708
+
709
+ ```python
710
+ class EmbeddingUsage(ModelUsage):
711
+ """
712
+ Model class for embedding usage.
713
+ """
714
+ tokens: int # 使用 token 数
715
+ total_tokens: int # 总使用 token 数
716
+ unit_price: Decimal # 单价
717
+ price_unit: Decimal # 价格单位,即单价基于多少 tokens
718
+ total_price: Decimal # 总费用
719
+ currency: str # 货币单位
720
+ latency: float # 请求耗时(s)
721
+ ```
722
+
723
+ ---
724
+
725
+ ### RerankResult
726
+
727
+ ```python
728
+ class RerankResult(BaseModel):
729
+ """
730
+ Model class for rerank result.
731
+ """
732
+ model: str # 实际使用模型
733
+ docs: list[RerankDocument] # 重排后的分段列表
734
+ ```
735
+
736
+ ### RerankDocument
737
+
738
+ ```python
739
+ class RerankDocument(BaseModel):
740
+ """
741
+ Model class for rerank document.
742
+ """
743
+ index: int # 原序号
744
+ text: str # 分段文本内容
745
+ score: float # 分数
746
+ ```
api/core/model_runtime/docs/zh_Hans/predefined_model_scale_out.md ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 预定义模型接入
2
+
3
+ 供应商集成完成后,接下来为供应商下模型的接入。
4
+
5
+ 我们首先需要确定接入模型的类型,并在对应供应商的目录下创建对应模型类型的 `module`。
6
+
7
+ 当前支持模型类型如下:
8
+
9
+ - `llm` 文本生成模型
10
+ - `text_embedding` 文本 Embedding 模型
11
+ - `rerank` Rerank 模型
12
+ - `speech2text` 语音转文字
13
+ - `tts` 文字转语音
14
+ - `moderation` 审查
15
+
16
+ 依旧以 `Anthropic` 为例,`Anthropic` 仅支持 LLM,因此在 `model_providers.anthropic` 创建一个 `llm` 为名称的 `module`。
17
+
18
+ 对于预定义的模型,我们首先需要在 `llm` `module` 下创建以模型名为文件名称的 YAML 文件,如:`claude-2.1.yaml`。
19
+
20
+ ### 准备模型 YAML
21
+
22
+ ```yaml
23
+ model: claude-2.1 # 模型标识
24
+ # 模型展示名称,可设置 en_US 英文、zh_Hans 中文两种语言,zh_Hans 不设置将默认使用 en_US。
25
+ # 也可不设置 label,则使用 model 标识内容。
26
+ label:
27
+ en_US: claude-2.1
28
+ model_type: llm # 模型类型,claude-2.1 为 LLM
29
+ features: # 支持功能,agent-thought 为支持 Agent 推理,vision 为支持图片理解
30
+ - agent-thought
31
+ model_properties: # 模型属性
32
+ mode: chat # LLM 模式,complete 文本补全模型,chat 对话模型
33
+ context_size: 200000 # 支持最大上下文大小
34
+ parameter_rules: # 模型调用参数规则,仅 LLM 需要提供
35
+ - name: temperature # 调用参数变量名
36
+ # 默认预置了 5 种变量内容配置模板,temperature/top_p/max_tokens/presence_penalty/frequency_penalty
37
+ # 可在 use_template 中直接设置模板变量名,将会使用 entities.defaults.PARAMETER_RULE_TEMPLATE 中的默认配置
38
+ # 若设置了额外的配置参数,将覆盖默认配置
39
+ use_template: temperature
40
+ - name: top_p
41
+ use_template: top_p
42
+ - name: top_k
43
+ label: # 调用参数展示名称
44
+ zh_Hans: 取样数量
45
+ en_US: Top k
46
+ type: int # 参数类型,支持 float/int/string/boolean
47
+ help: # 帮助信息,描述参数作用
48
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
49
+ en_US: Only sample from the top K options for each subsequent token.
50
+ required: false # 是否必填,可不设置
51
+ - name: max_tokens_to_sample
52
+ use_template: max_tokens
53
+ default: 4096 # 参数默认值
54
+ min: 1 # 参数最小值,仅 float/int 可用
55
+ max: 4096 # 参数最大值,仅 float/int 可用
56
+ pricing: # 价格信息
57
+ input: '8.00' # 输入单价,即 Prompt 单价
58
+ output: '24.00' # 输出单价,即返回内容单价
59
+ unit: '0.000001' # 价格单位,即上述价格为每 100K 的单价
60
+ currency: USD # 价格货币
61
+ ```
62
+
63
+ 建议将所有模型配置都准备完毕后再开始模型代码的实现。
64
+
65
+ 同样,也可以参考 `model_providers` 目录下其他供应商对应模型类型目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#aimodelentity)。
66
+
67
+ ### 实现模型调用代码
68
+
69
+ 接下来需要在 `llm` `module` 下创建一个同名的 python 文件 `llm.py` 来编写代码实现。
70
+
71
+ 在 `llm.py` 中创建一个 Anthropic LLM 类,我们取名为 `AnthropicLargeLanguageModel`(随意),继承 `__base.large_language_model.LargeLanguageModel` 基类,实现以下几个方法:
72
+
73
+ - LLM 调用
74
+
75
+ 实现 LLM 调用的核心方法,可同时支持流式和同步返回。
76
+
77
+ ```python
78
+ def _invoke(self, model: str, credentials: dict,
79
+ prompt_messages: list[PromptMessage], model_parameters: dict,
80
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
81
+ stream: bool = True, user: Optional[str] = None) \
82
+ -> Union[LLMResult, Generator]:
83
+ """
84
+ Invoke large language model
85
+
86
+ :param model: model name
87
+ :param credentials: model credentials
88
+ :param prompt_messages: prompt messages
89
+ :param model_parameters: model parameters
90
+ :param tools: tools for tool calling
91
+ :param stop: stop words
92
+ :param stream: is stream response
93
+ :param user: unique user id
94
+ :return: full response or stream response chunk generator result
95
+ """
96
+ ```
97
+
98
+ 在实现时,需要注意使用两个函数来返回数据,分别用于处理同步返回和流式返回,因为Python会将函数中包含 `yield` 关键字的函数识别为生成器函数,返回的数据类型固定为 `Generator`,因此同步和流式返回需要分别实现,就像下面这样(注意下面例子使用了简化参数,实际实现时需要按照上面的参数列表进行实现):
99
+
100
+ ```python
101
+ def _invoke(self, stream: bool, **kwargs) \
102
+ -> Union[LLMResult, Generator]:
103
+ if stream:
104
+ return self._handle_stream_response(**kwargs)
105
+ return self._handle_sync_response(**kwargs)
106
+
107
+ def _handle_stream_response(self, **kwargs) -> Generator:
108
+ for chunk in response:
109
+ yield chunk
110
+ def _handle_sync_response(self, **kwargs) -> LLMResult:
111
+ return LLMResult(**response)
112
+ ```
113
+
114
+ - 预计算输入 tokens
115
+
116
+ 若模型未提供预计算 tokens 接口,可直接返回 0。
117
+
118
+ ```python
119
+ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
120
+ tools: Optional[list[PromptMessageTool]] = None) -> int:
121
+ """
122
+ Get number of tokens for given prompt messages
123
+
124
+ :param model: model name
125
+ :param credentials: model credentials
126
+ :param prompt_messages: prompt messages
127
+ :param tools: tools for tool calling
128
+ :return:
129
+ """
130
+ ```
131
+
132
+ - 模型凭据校验
133
+
134
+ 与供应商凭据校验类似,这里针对单个模型进行校验。
135
+
136
+ ```python
137
+ def validate_credentials(self, model: str, credentials: dict) -> None:
138
+ """
139
+ Validate model credentials
140
+
141
+ :param model: model name
142
+ :param credentials: model credentials
143
+ :return:
144
+ """
145
+ ```
146
+
147
+ - 调用异常错误映射表
148
+
149
+ 当模型调用异常时需要映射到 Runtime 指定的 `InvokeError` 类型,方便 Dify 针对不同错误做不同后续处理。
150
+
151
+ Runtime Errors:
152
+
153
+ - `InvokeConnectionError` 调用连接错误
154
+ - `InvokeServerUnavailableError ` 调用服务方不可用
155
+ - `InvokeRateLimitError ` 调用达到限额
156
+ - `InvokeAuthorizationError` 调用鉴权失败
157
+ - `InvokeBadRequestError ` 调用传参有误
158
+
159
+ ```python
160
+ @property
161
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
162
+ """
163
+ Map model invoke error to unified error
164
+ The key is the error type thrown to the caller
165
+ The value is the error type thrown by the model,
166
+ which needs to be converted into a unified error type for the caller.
167
+
168
+ :return: Invoke error mapping
169
+ """
170
+ ```
171
+
172
+ 接口方法说明见:[Interfaces](./interfaces.md),具体实现可参考:[llm.py](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/llm/llm.py)。
api/core/model_runtime/docs/zh_Hans/provider_scale_out.md ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 增加新供应商
2
+
3
+ 供应商支持三种模型配置方式:
4
+
5
+ - `predefined-model ` 预定义模型
6
+
7
+ 表示用户只需要配置统一的供应商凭据即可使用供应商下的预定义模型。
8
+
9
+ - `customizable-model` 自定义模型
10
+
11
+ 用户需要新增每个模型的凭据配置,如Xinference,它同时支持 LLM 和 Text Embedding,但是每个模型都有唯一的**model_uid**,如果想要将两者同时接入,就需要为每个模型配置一个**model_uid**。
12
+
13
+ - `fetch-from-remote` 从远程获取
14
+
15
+ 与 `predefined-model` 配置方式一致,只需要配置统一的供应商凭据即可,模型通过凭据信息从供应商获取。
16
+
17
+ 如OpenAI,我们可以基于gpt-turbo-3.5来Fine Tune多个模型,而他们都位于同一个**api_key**下,当配置为 `fetch-from-remote` 时,开发者只需要配置统一的**api_key**即可让DifyRuntime获取到开发者所有的微调模型并接入Dify。
18
+
19
+ 这三种配置方式**支持共存**,即存在供应商支持 `predefined-model` + `customizable-model` 或 `predefined-model` + `fetch-from-remote` 等,也就是配置了供应商统一凭据可以使用预定义模型和从远程获取的模型,若新增了模型,则可以在此基础上额外使用自定义的模型。
20
+
21
+ ## 开始
22
+
23
+ ### 介绍
24
+
25
+ #### 名词解释
26
+ - `module`: 一个`module`即为一个Python Package,或者通俗一点,称为一个文件夹,里面包含了一个`__init__.py`文件,以及其他的`.py`文件。
27
+
28
+ #### 步骤
29
+ 新增一个供应商主要分为几步,这里简单列出,帮助大家有一个大概的认识,具体的步骤会在下面详细介绍。
30
+
31
+ - 创建供应商yaml文件,根据[ProviderSchema](./schema.md#provider)编写
32
+ - 创建供应商代码,实现一个`class`。
33
+ - 根据模型类型,在供应商`module`下创建对应的模型类型 `module`,如`llm`或`text_embedding`。
34
+ - 根据模型类型,在对应的模型`module`下创建同名的代码文件,如`llm.py`,并实现一个`class`。
35
+ - 如果有预定义模型,根据模型名称创建同名的yaml文件在模型`module`下,如`claude-2.1.yaml`,根据[AIModelEntity](./schema.md#aimodelentity)编写。
36
+ - 编写测试代码,确保功能可用。
37
+
38
+ ### 开始吧
39
+
40
+ 增加一个新的供应商需要先确定供应商的英文标识,如 `anthropic`,使用该标识在 `model_providers` 创建以此为名称的 `module`。
41
+
42
+ 在此 `module` 下,我们需要先准备供应商的 YAML 配置。
43
+
44
+ #### 准备供应商 YAML
45
+
46
+ 此处以 `Anthropic` 为例,预设了供应商基础信息、支持的模型类型、配置方式、凭据规则。
47
+
48
+ ```YAML
49
+ provider: anthropic # 供应商标识
50
+ label: # 供应商展示名称,可设置 en_US 英文、zh_Hans 中文两种语言,zh_Hans 不设置将默认使用 en_US。
51
+ en_US: Anthropic
52
+ icon_small: # 供应商小图标,存储在对应供应商实现目录下的 _assets 目录,中英文策略同 label
53
+ en_US: icon_s_en.png
54
+ icon_large: # 供应商大图标,存储在对应供应商实现目录下的 _assets 目录,中英文策略同 label
55
+ en_US: icon_l_en.png
56
+ supported_model_types: # 支持的模型类型,Anthropic 仅支持 LLM
57
+ - llm
58
+ configurate_methods: # 支持的配置方式,Anthropic 仅支持预定义模型
59
+ - predefined-model
60
+ provider_credential_schema: # 供应商凭据规则,由于 Anthropic 仅支持预定义模型,则需要定义统一供应商凭据规则
61
+ credential_form_schemas: # 凭据表单项列表
62
+ - variable: anthropic_api_key # 凭据参数变量名
63
+ label: # 展示名称
64
+ en_US: API Key
65
+ type: secret-input # 表单类型,此处 secret-input 代表加密信息输入框,编辑时只展示屏蔽后的信息。
66
+ required: true # 是否必填
67
+ placeholder: # PlaceHolder 信息
68
+ zh_Hans: 在此输入您的 API Key
69
+ en_US: Enter your API Key
70
+ - variable: anthropic_api_url
71
+ label:
72
+ en_US: API URL
73
+ type: text-input # 表单类型,此处 text-input 代表文本输入框
74
+ required: false
75
+ placeholder:
76
+ zh_Hans: 在此输入您的 API URL
77
+ en_US: Enter your API URL
78
+ ```
79
+
80
+ 如果接入的供应商提供自定义模型,比如`OpenAI`提供微调模型,那么我们就需要添加[`model_credential_schema`](./schema.md#modelcredentialschema),以`OpenAI`为例:
81
+
82
+ ```yaml
83
+ model_credential_schema:
84
+ model: # 微调模型名称
85
+ label:
86
+ en_US: Model Name
87
+ zh_Hans: 模型名称
88
+ placeholder:
89
+ en_US: Enter your model name
90
+ zh_Hans: 输入模型名称
91
+ credential_form_schemas:
92
+ - variable: openai_api_key
93
+ label:
94
+ en_US: API Key
95
+ type: secret-input
96
+ required: true
97
+ placeholder:
98
+ zh_Hans: 在此输入您的 API Key
99
+ en_US: Enter your API Key
100
+ - variable: openai_organization
101
+ label:
102
+ zh_Hans: 组织 ID
103
+ en_US: Organization
104
+ type: text-input
105
+ required: false
106
+ placeholder:
107
+ zh_Hans: 在此输入您的组织 ID
108
+ en_US: Enter your Organization ID
109
+ - variable: openai_api_base
110
+ label:
111
+ zh_Hans: API Base
112
+ en_US: API Base
113
+ type: text-input
114
+ required: false
115
+ placeholder:
116
+ zh_Hans: 在此输入您的 API Base
117
+ en_US: Enter your API Base
118
+ ```
119
+
120
+ 也可以参考 `model_providers` 目录下其他供应商目录下的 YAML 配置信息,完整的 YAML 规则见:[Schema](schema.md#provider)。
121
+
122
+ #### 实现供应商代码
123
+
124
+ 我们需要在`model_providers`下创建一个同名的python文件,如`anthropic.py`,并实现一个`class`,继承`__base.provider.Provider`基类,如`AnthropicProvider`。
125
+
126
+ ##### 自定义模型供应商
127
+
128
+ 当供应商为Xinference等自定义模型供应商时,可跳过该步骤,仅创建一个空的`XinferenceProvider`类即可,并实现一个空的`validate_provider_credentials`方法,该方法并不会被实际使用,仅用作避免抽象类无法实例化。
129
+
130
+ ```python
131
+ class XinferenceProvider(Provider):
132
+ def validate_provider_credentials(self, credentials: dict) -> None:
133
+ pass
134
+ ```
135
+
136
+ ##### 预定义模型供应商
137
+
138
+ 供应商需要继承 `__base.model_provider.ModelProvider` 基类,实现 `validate_provider_credentials` 供应商统一凭据校验方法即可,可参考 [AnthropicProvider](https://github.com/langgenius/dify-runtime/blob/main/lib/model_providers/anthropic/anthropic.py)。
139
+
140
+ ```python
141
+ def validate_provider_credentials(self, credentials: dict) -> None:
142
+ """
143
+ Validate provider credentials
144
+ You can choose any validate_credentials method of model type or implement validate method by yourself,
145
+ such as: get model list api
146
+
147
+ if validate failed, raise exception
148
+
149
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
150
+ """
151
+ ```
152
+
153
+ 当然也可以先预留 `validate_provider_credentials` 实现,在模型凭据校验方法实现后直接复用。
154
+
155
+ #### 增加模型
156
+
157
+ #### [增加预定义模型 👈🏻](./predefined_model_scale_out.md)
158
+ 对于预定义模型,我们可以通过简单定义一个yaml,并通过实现调用代码来接入。
159
+
160
+ #### [增加自定义模型 👈🏻](./customizable_model_scale_out.md)
161
+ 对于自定义模型,我们只需要实现调用代码即可接入,但是它需要处理的参数可能会更加复杂。
162
+
163
+ ---
164
+
165
+ ### 测试
166
+
167
+ 为了保证接入供应商/模型的可用性,编写后的每个方法均需要在 `tests` 目录中编写对应的集成测试代码。
168
+
169
+ 依旧以 `Anthropic` 为例。
170
+
171
+ 在编写测试代码前,需要先在 `.env.example` 新增测试供应商所需要的凭据环境变量,如:`ANTHROPIC_API_KEY`。
172
+
173
+ 在执行前需要将 `.env.example` 复制为 `.env` 再执行。
174
+
175
+ #### 编写测试代码
176
+
177
+ 在 `tests` 目录下创建供应商同名的 `module`: `anthropic`,继续在此模块中创建 `test_provider.py` 以及对应模型类型的 test py 文件,如下所示:
178
+
179
+ ```shell
180
+ .
181
+ ├── __init__.py
182
+ ├── anthropic
183
+ │   ├── __init__.py
184
+ │   ├── test_llm.py # LLM 测试
185
+ │   └── test_provider.py # 供应商测试
186
+ ```
187
+
188
+ 针对上面实现的代码的各种情况进行测试代码编写,并测试通过后提交代码。
api/core/model_runtime/docs/zh_Hans/schema.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 配置规则
2
+
3
+ - 供应商规则基于 [Provider](#Provider) 实体。
4
+
5
+ - 模型规则基于 [AIModelEntity](#AIModelEntity) 实体。
6
+
7
+ > 以下所有实体均基于 `Pydantic BaseModel`,可在 `entities` 模块中找到对应实体。
8
+
9
+ ### Provider
10
+
11
+ - `provider` (string) 供应商标识,如:`openai`
12
+ - `label` (object) 供应商展示名称,i18n,可设置 `en_US` 英文、`zh_Hans` 中文两种语言
13
+ - `zh_Hans ` (string) [optional] 中文标签名,`zh_Hans` 不设置将默认使用 `en_US`。
14
+ - `en_US` (string) 英文标签名
15
+ - `description` (object) [optional] 供应商描述,i18n
16
+ - `zh_Hans` (string) [optional] 中文描述
17
+ - `en_US` (string) 英文描述
18
+ - `icon_small` (string) [optional] 供应商小 ICON,存储在对应供应商实现目录下的 `_assets` 目录,中英文策略同 `label`
19
+ - `zh_Hans` (string) [optional] 中文 ICON
20
+ - `en_US` (string) 英文 ICON
21
+ - `icon_large` (string) [optional] 供应商大 ICON,存储在对应供应商实现目录下的 _assets 目录,中英文策略同 label
22
+ - `zh_Hans `(string) [optional] 中文 ICON
23
+ - `en_US` (string) 英文 ICON
24
+ - `background` (string) [optional] 背景颜色色值,例:#FFFFFF,为空则展示前端默认色值。
25
+ - `help` (object) [optional] 帮助信息
26
+ - `title` (object) 帮助标题,i18n
27
+ - `zh_Hans` (string) [optional] 中文标题
28
+ - `en_US` (string) 英文标题
29
+ - `url` (object) 帮助链接,i18n
30
+ - `zh_Hans` (string) [optional] 中文链接
31
+ - `en_US` (string) 英文链接
32
+ - `supported_model_types` (array[[ModelType](#ModelType)]) 支持的模型类型
33
+ - `configurate_methods` (array[[ConfigurateMethod](#ConfigurateMethod)]) 配置方式
34
+ - `provider_credential_schema` ([ProviderCredentialSchema](#ProviderCredentialSchema)) 供应商凭据规格
35
+ - `model_credential_schema` ([ModelCredentialSchema](#ModelCredentialSchema)) 模型凭据规格
36
+
37
+ ### AIModelEntity
38
+
39
+ - `model` (string) 模型标识,如:`gpt-3.5-turbo`
40
+ - `label` (object) [optional] 模型展示名称,i18n,可设置 `en_US` 英文、`zh_Hans` 中文两种语言
41
+ - `zh_Hans `(string) [optional] 中文标签名
42
+ - `en_US` (string) 英文标签名
43
+ - `model_type` ([ModelType](#ModelType)) 模型类型
44
+ - `features` (array[[ModelFeature](#ModelFeature)]) [optional] 支持功能列表
45
+ - `model_properties` (object) 模型属性
46
+ - `mode` ([LLMMode](#LLMMode)) 模式 (模型类型 `llm` 可用)
47
+ - `context_size` (int) 上下文大小 (模型类型 `llm` `text-embedding` 可用)
48
+ - `max_chunks` (int) 最大分块数量 (模型类型 `text-embedding ` `moderation` 可用)
49
+ - `file_upload_limit` (int) 文件最大上传限制,单位:MB。(模型类型 `speech2text` 可用)
50
+ - `supported_file_extensions` (string) 支持文件扩展格式,如:mp3,mp4(模型类型 `speech2text` 可用)
51
+ - `default_voice` (string) 缺省音色,必选:alloy,echo,fable,onyx,nova,shimmer(模型类型 `tts` 可用)
52
+ - `voices` (list) 可选音色列表。
53
+ - `mode` (string) 音色模型。(模型类型 `tts` 可用)
54
+ - `name` (string) 音色模型显示名称。(模型类型 `tts` 可用)
55
+ - `language` (string) 音色模型支持语言。(模型类型 `tts` 可用)
56
+ - `word_limit` (int) 单次转换字数限制,默认按段落分段(模型类型 `tts` 可用)
57
+ - `audio_type` (string) 支持音频文件扩展格式,如:mp3,wav(模型类型 `tts` 可用)
58
+ - `max_workers` (int) 支持文字音频转换并发任务数(模型类型 `tts` 可用)
59
+ - `max_characters_per_chunk` (int) 每块最大字符数 (模型类型 `moderation` 可用)
60
+ - `parameter_rules` (array[[ParameterRule](#ParameterRule)]) [optional] 模型调用参数规则
61
+ - `pricing` ([PriceConfig](#PriceConfig)) [optional] 价格信息
62
+ - `deprecated` (bool) 是否废弃。若废弃,模型列表将不再展示,但已经配置的可以继续使用,默认 False。
63
+
64
+ ### ModelType
65
+
66
+ - `llm` 文本生成模型
67
+ - `text-embedding` 文本 Embedding 模型
68
+ - `rerank` Rerank 模型
69
+ - `speech2text` 语音转文字
70
+ - `tts` 文字转语音
71
+ - `moderation` 审查
72
+
73
+ ### ConfigurateMethod
74
+
75
+ - `predefined-model ` 预定义模型
76
+
77
+ 表示用户只需要配置统一的供应商凭据即可使用供应商下的预定义模型。
78
+ - `customizable-model` 自定义模型
79
+
80
+ 用户需要新增每个模型的凭据配置。
81
+
82
+ - `fetch-from-remote` 从远程获取
83
+
84
+ 与 `predefined-model` 配置方式一致,只需要配置统一的供应商凭据即可,模型通过凭据信息从供应商获取。
85
+
86
+ ### ModelFeature
87
+
88
+ - `agent-thought` Agent 推理,一般超过 70B 有思维链能力。
89
+ - `vision` 视觉,即:图像理解。
90
+ - `tool-call` 工具调用
91
+ - `multi-tool-call` 多工具调用
92
+ - `stream-tool-call` 流式工具调用
93
+
94
+ ### FetchFrom
95
+
96
+ - `predefined-model` 预定义模型
97
+ - `fetch-from-remote` 远程模型
98
+
99
+ ### LLMMode
100
+
101
+ - `completion` 文本补全
102
+ - `chat` 对话
103
+
104
+ ### ParameterRule
105
+
106
+ - `name` (string) 调用模型实际参数名
107
+
108
+ - `use_template` (string) [optional] 使用模板
109
+
110
+ 默认预置了 5 种变量内容配置模板:
111
+
112
+ - `temperature`
113
+ - `top_p`
114
+ - `frequency_penalty`
115
+ - `presence_penalty`
116
+ - `max_tokens`
117
+
118
+ 可在 use_template 中直接设置模板变量名,将会使用 entities.defaults.PARAMETER_RULE_TEMPLATE 中的默认配置
119
+ 不用设置除 `name` 和 `use_template` 之外的所有参数,若设置了额外的配置参数,将覆盖默认配置。
120
+ 可参考 `openai/llm/gpt-3.5-turbo.yaml`。
121
+
122
+ - `label` (object) [optional] 标签,i18n
123
+
124
+ - `zh_Hans`(string) [optional] 中文标签名
125
+ - `en_US` (string) 英文标签名
126
+
127
+ - `type`(string) [optional] 参数类型
128
+
129
+ - `int` 整数
130
+ - `float` 浮点数
131
+ - `string` 字符串
132
+ - `boolean` 布尔型
133
+
134
+ - `help` (string) [optional] 帮助信息
135
+
136
+ - `zh_Hans` (string) [optional] 中文帮助信息
137
+ - `en_US` (string) 英文帮助信息
138
+
139
+ - `required` (bool) 是否必填,默认 False。
140
+
141
+ - `default`(int/float/string/bool) [optional] 默认值
142
+
143
+ - `min`(int/float) [optional] 最小值,仅数字类型适用
144
+
145
+ - `max`(int/float) [optional] 最大值,仅数字类型适用
146
+
147
+ - `precision`(int) [optional] 精度,保留小数位数,仅数字类型适用
148
+
149
+ - `options` (array[string]) [optional] 下拉选项值,仅当 `type` 为 `string` 时适用,若不设置或为 null 则不限制选项值
150
+
151
+ ### PriceConfig
152
+
153
+ - `input` (float) 输入单价,即 Prompt 单价
154
+ - `output` (float) 输出单价,即返回内容单价
155
+ - `unit` (float) 价格单位,如以 1M tokens 计价,则单价对应的单位 token 数为 `0.000001`
156
+ - `currency` (string) 货币单位
157
+
158
+ ### ProviderCredentialSchema
159
+
160
+ - `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) 凭据表单规范
161
+
162
+ ### ModelCredentialSchema
163
+
164
+ - `model` (object) 模型标识,变量名默认 `model`
165
+ - `label` (object) 模型表单项展示名称
166
+ - `en_US` (string) 英文
167
+ - `zh_Hans`(string) [optional] 中文
168
+ - `placeholder` (object) 模型提示内容
169
+ - `en_US`(string) 英文
170
+ - `zh_Hans`(string) [optional] 中文
171
+ - `credential_form_schemas` (array[[CredentialFormSchema](#CredentialFormSchema)]) 凭据表单规范
172
+
173
+ ### CredentialFormSchema
174
+
175
+ - `variable` (string) 表单项变量名
176
+ - `label` (object) 表单项标签名
177
+ - `en_US`(string) 英文
178
+ - `zh_Hans` (string) [optional] 中文
179
+ - `type` ([FormType](#FormType)) 表单项类型
180
+ - `required` (bool) 是否必填
181
+ - `default`(string) 默认值
182
+ - `options` (array[[FormOption](#FormOption)]) 表单项为 `select` 或 `radio` 专有属性,定义下拉内容
183
+ - `placeholder`(object) 表单项为 `text-input `专有属性,表单项 PlaceHolder
184
+ - `en_US`(string) 英文
185
+ - `zh_Hans` (string) [optional] 中文
186
+ - `max_length` (int) 表单项为`text-input`专有属性,定义输入最大长度,0 为不限制。
187
+ - `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) 当其他表单项值符合条件时显示,为空则始终显示。
188
+
189
+ ### FormType
190
+
191
+ - `text-input` 文本输入组件
192
+ - `secret-input` 密码输入组件
193
+ - `select` 单选下拉
194
+ - `radio` Radio 组件
195
+ - `switch` 开关组件,仅支持 `true` 和 `false`
196
+
197
+ ### FormOption
198
+
199
+ - `label` (object) 标签
200
+ - `en_US`(string) 英文
201
+ - `zh_Hans`(string) [optional] 中文
202
+ - `value` (string) 下拉选项值
203
+ - `show_on` (array[[FormShowOnObject](#FormShowOnObject)]) 当其他表单项值符合条件时显示,为空则始终显示。
204
+
205
+ ### FormShowOnObject
206
+
207
+ - `variable` (string) 其他表单项变量名
208
+ - `value` (string) 其他表单项变量值
api/core/model_runtime/entities/__init__.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
2
+ from .message_entities import (
3
+ AssistantPromptMessage,
4
+ AudioPromptMessageContent,
5
+ DocumentPromptMessageContent,
6
+ ImagePromptMessageContent,
7
+ MultiModalPromptMessageContent,
8
+ PromptMessage,
9
+ PromptMessageContent,
10
+ PromptMessageContentType,
11
+ PromptMessageRole,
12
+ PromptMessageTool,
13
+ SystemPromptMessage,
14
+ TextPromptMessageContent,
15
+ ToolPromptMessage,
16
+ UserPromptMessage,
17
+ VideoPromptMessageContent,
18
+ )
19
+ from .model_entities import ModelPropertyKey
20
+
21
+ __all__ = [
22
+ "AssistantPromptMessage",
23
+ "AudioPromptMessageContent",
24
+ "DocumentPromptMessageContent",
25
+ "ImagePromptMessageContent",
26
+ "LLMMode",
27
+ "LLMResult",
28
+ "LLMResultChunk",
29
+ "LLMResultChunkDelta",
30
+ "LLMUsage",
31
+ "ModelPropertyKey",
32
+ "MultiModalPromptMessageContent",
33
+ "PromptMessage",
34
+ "PromptMessage",
35
+ "PromptMessageContent",
36
+ "PromptMessageContentType",
37
+ "PromptMessageRole",
38
+ "PromptMessageRole",
39
+ "PromptMessageTool",
40
+ "SystemPromptMessage",
41
+ "TextPromptMessageContent",
42
+ "ToolPromptMessage",
43
+ "UserPromptMessage",
44
+ "VideoPromptMessageContent",
45
+ ]
api/core/model_runtime/entities/common_entities.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class I18nObject(BaseModel):
7
+ """
8
+ Model class for i18n object.
9
+ """
10
+
11
+ zh_Hans: Optional[str] = None
12
+ en_US: str
13
+
14
+ def __init__(self, **data):
15
+ super().__init__(**data)
16
+ if not self.zh_Hans:
17
+ self.zh_Hans = self.en_US
api/core/model_runtime/entities/defaults.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.model_runtime.entities.model_entities import DefaultParameterName
2
+
3
+ PARAMETER_RULE_TEMPLATE: dict[DefaultParameterName, dict] = {
4
+ DefaultParameterName.TEMPERATURE: {
5
+ "label": {
6
+ "en_US": "Temperature",
7
+ "zh_Hans": "温度",
8
+ },
9
+ "type": "float",
10
+ "help": {
11
+ "en_US": "Controls randomness. Lower temperature results in less random completions."
12
+ " As the temperature approaches zero, the model will become deterministic and repetitive."
13
+ " Higher temperature results in more random completions.",
14
+ "zh_Hans": "温度控制随机性。较低的温度会导致较少的随机完成。随着温度接近零,模型将变得确定性和重复性。"
15
+ "较高的温度会导致更多的随机完成。",
16
+ },
17
+ "required": False,
18
+ "default": 0.0,
19
+ "min": 0.0,
20
+ "max": 1.0,
21
+ "precision": 2,
22
+ },
23
+ DefaultParameterName.TOP_P: {
24
+ "label": {
25
+ "en_US": "Top P",
26
+ "zh_Hans": "Top P",
27
+ },
28
+ "type": "float",
29
+ "help": {
30
+ "en_US": "Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options"
31
+ " are considered.",
32
+ "zh_Hans": "通过核心采样控制多样性:0.5表示考虑了一半的所有可能性加权选项。",
33
+ },
34
+ "required": False,
35
+ "default": 1.0,
36
+ "min": 0.0,
37
+ "max": 1.0,
38
+ "precision": 2,
39
+ },
40
+ DefaultParameterName.TOP_K: {
41
+ "label": {
42
+ "en_US": "Top K",
43
+ "zh_Hans": "Top K",
44
+ },
45
+ "type": "int",
46
+ "help": {
47
+ "en_US": "Limits the number of tokens to consider for each step by keeping only the k most likely tokens.",
48
+ "zh_Hans": "通过只保留每一步中最可能的 k 个标记来限制要考虑的标记数量。",
49
+ },
50
+ "required": False,
51
+ "default": 50,
52
+ "min": 1,
53
+ "max": 100,
54
+ "precision": 0,
55
+ },
56
+ DefaultParameterName.PRESENCE_PENALTY: {
57
+ "label": {
58
+ "en_US": "Presence Penalty",
59
+ "zh_Hans": "存在惩罚",
60
+ },
61
+ "type": "float",
62
+ "help": {
63
+ "en_US": "Applies a penalty to the log-probability of tokens already in the text.",
64
+ "zh_Hans": "对文本中已有的标记的对数概率施加惩罚。",
65
+ },
66
+ "required": False,
67
+ "default": 0.0,
68
+ "min": 0.0,
69
+ "max": 1.0,
70
+ "precision": 2,
71
+ },
72
+ DefaultParameterName.FREQUENCY_PENALTY: {
73
+ "label": {
74
+ "en_US": "Frequency Penalty",
75
+ "zh_Hans": "频率惩罚",
76
+ },
77
+ "type": "float",
78
+ "help": {
79
+ "en_US": "Applies a penalty to the log-probability of tokens that appear in the text.",
80
+ "zh_Hans": "对文本中出现的标记的对数概率施加惩罚。",
81
+ },
82
+ "required": False,
83
+ "default": 0.0,
84
+ "min": 0.0,
85
+ "max": 1.0,
86
+ "precision": 2,
87
+ },
88
+ DefaultParameterName.MAX_TOKENS: {
89
+ "label": {
90
+ "en_US": "Max Tokens",
91
+ "zh_Hans": "最大标记",
92
+ },
93
+ "type": "int",
94
+ "help": {
95
+ "en_US": "Specifies the upper limit on the length of generated results."
96
+ " If the generated results are truncated, you can increase this parameter.",
97
+ "zh_Hans": "指定生成结果长度的上限。如果生成结果截断,可以调大该参数。",
98
+ },
99
+ "required": False,
100
+ "default": 64,
101
+ "min": 1,
102
+ "max": 2048,
103
+ "precision": 0,
104
+ },
105
+ DefaultParameterName.RESPONSE_FORMAT: {
106
+ "label": {
107
+ "en_US": "Response Format",
108
+ "zh_Hans": "回复格式",
109
+ },
110
+ "type": "string",
111
+ "help": {
112
+ "en_US": "Set a response format, ensure the output from llm is a valid code block as possible,"
113
+ " such as JSON, XML, etc.",
114
+ "zh_Hans": "设置一个返回格式,确保llm的输出尽可能是有效的代码块,如JSON、XML等",
115
+ },
116
+ "required": False,
117
+ "options": ["JSON", "XML"],
118
+ },
119
+ DefaultParameterName.JSON_SCHEMA: {
120
+ "label": {
121
+ "en_US": "JSON Schema",
122
+ },
123
+ "type": "text",
124
+ "help": {
125
+ "en_US": "Set a response json schema will ensure LLM to adhere it.",
126
+ "zh_Hans": "设置返回的json schema,llm将按照它返回",
127
+ },
128
+ "required": False,
129
+ },
130
+ }
api/core/model_runtime/entities/llm_entities.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from decimal import Decimal
2
+ from enum import StrEnum
3
+ from typing import Optional
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
8
+ from core.model_runtime.entities.model_entities import ModelUsage, PriceInfo
9
+
10
+
11
+ class LLMMode(StrEnum):
12
+ """
13
+ Enum class for large language model mode.
14
+ """
15
+
16
+ COMPLETION = "completion"
17
+ CHAT = "chat"
18
+
19
+ @classmethod
20
+ def value_of(cls, value: str) -> "LLMMode":
21
+ """
22
+ Get value of given mode.
23
+
24
+ :param value: mode value
25
+ :return: mode
26
+ """
27
+ for mode in cls:
28
+ if mode.value == value:
29
+ return mode
30
+ raise ValueError(f"invalid mode value {value}")
31
+
32
+
33
+ class LLMUsage(ModelUsage):
34
+ """
35
+ Model class for llm usage.
36
+ """
37
+
38
+ prompt_tokens: int
39
+ prompt_unit_price: Decimal
40
+ prompt_price_unit: Decimal
41
+ prompt_price: Decimal
42
+ completion_tokens: int
43
+ completion_unit_price: Decimal
44
+ completion_price_unit: Decimal
45
+ completion_price: Decimal
46
+ total_tokens: int
47
+ total_price: Decimal
48
+ currency: str
49
+ latency: float
50
+
51
+ @classmethod
52
+ def empty_usage(cls):
53
+ return cls(
54
+ prompt_tokens=0,
55
+ prompt_unit_price=Decimal("0.0"),
56
+ prompt_price_unit=Decimal("0.0"),
57
+ prompt_price=Decimal("0.0"),
58
+ completion_tokens=0,
59
+ completion_unit_price=Decimal("0.0"),
60
+ completion_price_unit=Decimal("0.0"),
61
+ completion_price=Decimal("0.0"),
62
+ total_tokens=0,
63
+ total_price=Decimal("0.0"),
64
+ currency="USD",
65
+ latency=0.0,
66
+ )
67
+
68
+ def plus(self, other: "LLMUsage") -> "LLMUsage":
69
+ """
70
+ Add two LLMUsage instances together.
71
+
72
+ :param other: Another LLMUsage instance to add
73
+ :return: A new LLMUsage instance with summed values
74
+ """
75
+ if self.total_tokens == 0:
76
+ return other
77
+ else:
78
+ return LLMUsage(
79
+ prompt_tokens=self.prompt_tokens + other.prompt_tokens,
80
+ prompt_unit_price=other.prompt_unit_price,
81
+ prompt_price_unit=other.prompt_price_unit,
82
+ prompt_price=self.prompt_price + other.prompt_price,
83
+ completion_tokens=self.completion_tokens + other.completion_tokens,
84
+ completion_unit_price=other.completion_unit_price,
85
+ completion_price_unit=other.completion_price_unit,
86
+ completion_price=self.completion_price + other.completion_price,
87
+ total_tokens=self.total_tokens + other.total_tokens,
88
+ total_price=self.total_price + other.total_price,
89
+ currency=other.currency,
90
+ latency=self.latency + other.latency,
91
+ )
92
+
93
+ def __add__(self, other: "LLMUsage") -> "LLMUsage":
94
+ """
95
+ Overload the + operator to add two LLMUsage instances.
96
+
97
+ :param other: Another LLMUsage instance to add
98
+ :return: A new LLMUsage instance with summed values
99
+ """
100
+ return self.plus(other)
101
+
102
+
103
+ class LLMResult(BaseModel):
104
+ """
105
+ Model class for llm result.
106
+ """
107
+
108
+ id: Optional[str] = None
109
+ model: str
110
+ prompt_messages: list[PromptMessage]
111
+ message: AssistantPromptMessage
112
+ usage: LLMUsage
113
+ system_fingerprint: Optional[str] = None
114
+
115
+
116
+ class LLMResultChunkDelta(BaseModel):
117
+ """
118
+ Model class for llm result chunk delta.
119
+ """
120
+
121
+ index: int
122
+ message: AssistantPromptMessage
123
+ usage: Optional[LLMUsage] = None
124
+ finish_reason: Optional[str] = None
125
+
126
+
127
+ class LLMResultChunk(BaseModel):
128
+ """
129
+ Model class for llm result chunk.
130
+ """
131
+
132
+ model: str
133
+ prompt_messages: list[PromptMessage]
134
+ system_fingerprint: Optional[str] = None
135
+ delta: LLMResultChunkDelta
136
+
137
+
138
+ class NumTokensResult(PriceInfo):
139
+ """
140
+ Model class for number of tokens result.
141
+ """
142
+
143
+ tokens: int
api/core/model_runtime/entities/message_entities.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+ from collections.abc import Sequence
3
+ from enum import Enum, StrEnum
4
+ from typing import Optional
5
+
6
+ from pydantic import BaseModel, Field, field_validator
7
+
8
+
9
+ class PromptMessageRole(Enum):
10
+ """
11
+ Enum class for prompt message.
12
+ """
13
+
14
+ SYSTEM = "system"
15
+ USER = "user"
16
+ ASSISTANT = "assistant"
17
+ TOOL = "tool"
18
+
19
+ @classmethod
20
+ def value_of(cls, value: str) -> "PromptMessageRole":
21
+ """
22
+ Get value of given mode.
23
+
24
+ :param value: mode value
25
+ :return: mode
26
+ """
27
+ for mode in cls:
28
+ if mode.value == value:
29
+ return mode
30
+ raise ValueError(f"invalid prompt message type value {value}")
31
+
32
+
33
+ class PromptMessageTool(BaseModel):
34
+ """
35
+ Model class for prompt message tool.
36
+ """
37
+
38
+ name: str
39
+ description: str
40
+ parameters: dict
41
+
42
+
43
+ class PromptMessageFunction(BaseModel):
44
+ """
45
+ Model class for prompt message function.
46
+ """
47
+
48
+ type: str = "function"
49
+ function: PromptMessageTool
50
+
51
+
52
+ class PromptMessageContentType(StrEnum):
53
+ """
54
+ Enum class for prompt message content type.
55
+ """
56
+
57
+ TEXT = "text"
58
+ IMAGE = "image"
59
+ AUDIO = "audio"
60
+ VIDEO = "video"
61
+ DOCUMENT = "document"
62
+
63
+
64
+ class PromptMessageContent(BaseModel):
65
+ """
66
+ Model class for prompt message content.
67
+ """
68
+
69
+ type: PromptMessageContentType
70
+
71
+
72
+ class TextPromptMessageContent(PromptMessageContent):
73
+ """
74
+ Model class for text prompt message content.
75
+ """
76
+
77
+ type: PromptMessageContentType = PromptMessageContentType.TEXT
78
+ data: str
79
+
80
+
81
+ class MultiModalPromptMessageContent(PromptMessageContent):
82
+ """
83
+ Model class for multi-modal prompt message content.
84
+ """
85
+
86
+ type: PromptMessageContentType
87
+ format: str = Field(default=..., description="the format of multi-modal file")
88
+ base64_data: str = Field(default="", description="the base64 data of multi-modal file")
89
+ url: str = Field(default="", description="the url of multi-modal file")
90
+ mime_type: str = Field(default=..., description="the mime type of multi-modal file")
91
+
92
+ @property
93
+ def data(self):
94
+ return self.url or f"data:{self.mime_type};base64,{self.base64_data}"
95
+
96
+
97
+ class VideoPromptMessageContent(MultiModalPromptMessageContent):
98
+ type: PromptMessageContentType = PromptMessageContentType.VIDEO
99
+
100
+
101
+ class AudioPromptMessageContent(MultiModalPromptMessageContent):
102
+ type: PromptMessageContentType = PromptMessageContentType.AUDIO
103
+
104
+
105
+ class ImagePromptMessageContent(MultiModalPromptMessageContent):
106
+ """
107
+ Model class for image prompt message content.
108
+ """
109
+
110
+ class DETAIL(StrEnum):
111
+ LOW = "low"
112
+ HIGH = "high"
113
+
114
+ type: PromptMessageContentType = PromptMessageContentType.IMAGE
115
+ detail: DETAIL = DETAIL.LOW
116
+
117
+
118
+ class DocumentPromptMessageContent(MultiModalPromptMessageContent):
119
+ type: PromptMessageContentType = PromptMessageContentType.DOCUMENT
120
+
121
+
122
+ class PromptMessage(ABC, BaseModel):
123
+ """
124
+ Model class for prompt message.
125
+ """
126
+
127
+ role: PromptMessageRole
128
+ content: Optional[str | Sequence[PromptMessageContent]] = None
129
+ name: Optional[str] = None
130
+
131
+ def is_empty(self) -> bool:
132
+ """
133
+ Check if prompt message is empty.
134
+
135
+ :return: True if prompt message is empty, False otherwise
136
+ """
137
+ return not self.content
138
+
139
+
140
+ class UserPromptMessage(PromptMessage):
141
+ """
142
+ Model class for user prompt message.
143
+ """
144
+
145
+ role: PromptMessageRole = PromptMessageRole.USER
146
+
147
+
148
+ class AssistantPromptMessage(PromptMessage):
149
+ """
150
+ Model class for assistant prompt message.
151
+ """
152
+
153
+ class ToolCall(BaseModel):
154
+ """
155
+ Model class for assistant prompt message tool call.
156
+ """
157
+
158
+ class ToolCallFunction(BaseModel):
159
+ """
160
+ Model class for assistant prompt message tool call function.
161
+ """
162
+
163
+ name: str
164
+ arguments: str
165
+
166
+ id: str
167
+ type: str
168
+ function: ToolCallFunction
169
+
170
+ @field_validator("id", mode="before")
171
+ @classmethod
172
+ def transform_id_to_str(cls, value) -> str:
173
+ if not isinstance(value, str):
174
+ return str(value)
175
+ else:
176
+ return value
177
+
178
+ role: PromptMessageRole = PromptMessageRole.ASSISTANT
179
+ tool_calls: list[ToolCall] = []
180
+
181
+ def is_empty(self) -> bool:
182
+ """
183
+ Check if prompt message is empty.
184
+
185
+ :return: True if prompt message is empty, False otherwise
186
+ """
187
+ if not super().is_empty() and not self.tool_calls:
188
+ return False
189
+
190
+ return True
191
+
192
+
193
+ class SystemPromptMessage(PromptMessage):
194
+ """
195
+ Model class for system prompt message.
196
+ """
197
+
198
+ role: PromptMessageRole = PromptMessageRole.SYSTEM
199
+
200
+
201
+ class ToolPromptMessage(PromptMessage):
202
+ """
203
+ Model class for tool prompt message.
204
+ """
205
+
206
+ role: PromptMessageRole = PromptMessageRole.TOOL
207
+ tool_call_id: str
208
+
209
+ def is_empty(self) -> bool:
210
+ """
211
+ Check if prompt message is empty.
212
+
213
+ :return: True if prompt message is empty, False otherwise
214
+ """
215
+ if not super().is_empty() and not self.tool_call_id:
216
+ return False
217
+
218
+ return True
api/core/model_runtime/entities/model_entities.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from decimal import Decimal
2
+ from enum import Enum, StrEnum
3
+ from typing import Any, Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+ from core.model_runtime.entities.common_entities import I18nObject
8
+
9
+
10
+ class ModelType(Enum):
11
+ """
12
+ Enum class for model type.
13
+ """
14
+
15
+ LLM = "llm"
16
+ TEXT_EMBEDDING = "text-embedding"
17
+ RERANK = "rerank"
18
+ SPEECH2TEXT = "speech2text"
19
+ MODERATION = "moderation"
20
+ TTS = "tts"
21
+ TEXT2IMG = "text2img"
22
+
23
+ @classmethod
24
+ def value_of(cls, origin_model_type: str) -> "ModelType":
25
+ """
26
+ Get model type from origin model type.
27
+
28
+ :return: model type
29
+ """
30
+ if origin_model_type in {"text-generation", cls.LLM.value}:
31
+ return cls.LLM
32
+ elif origin_model_type in {"embeddings", cls.TEXT_EMBEDDING.value}:
33
+ return cls.TEXT_EMBEDDING
34
+ elif origin_model_type in {"reranking", cls.RERANK.value}:
35
+ return cls.RERANK
36
+ elif origin_model_type in {"speech2text", cls.SPEECH2TEXT.value}:
37
+ return cls.SPEECH2TEXT
38
+ elif origin_model_type in {"tts", cls.TTS.value}:
39
+ return cls.TTS
40
+ elif origin_model_type in {"text2img", cls.TEXT2IMG.value}:
41
+ return cls.TEXT2IMG
42
+ elif origin_model_type == cls.MODERATION.value:
43
+ return cls.MODERATION
44
+ else:
45
+ raise ValueError(f"invalid origin model type {origin_model_type}")
46
+
47
+ def to_origin_model_type(self) -> str:
48
+ """
49
+ Get origin model type from model type.
50
+
51
+ :return: origin model type
52
+ """
53
+ if self == self.LLM:
54
+ return "text-generation"
55
+ elif self == self.TEXT_EMBEDDING:
56
+ return "embeddings"
57
+ elif self == self.RERANK:
58
+ return "reranking"
59
+ elif self == self.SPEECH2TEXT:
60
+ return "speech2text"
61
+ elif self == self.TTS:
62
+ return "tts"
63
+ elif self == self.MODERATION:
64
+ return "moderation"
65
+ elif self == self.TEXT2IMG:
66
+ return "text2img"
67
+ else:
68
+ raise ValueError(f"invalid model type {self}")
69
+
70
+
71
+ class FetchFrom(Enum):
72
+ """
73
+ Enum class for fetch from.
74
+ """
75
+
76
+ PREDEFINED_MODEL = "predefined-model"
77
+ CUSTOMIZABLE_MODEL = "customizable-model"
78
+
79
+
80
+ class ModelFeature(Enum):
81
+ """
82
+ Enum class for llm feature.
83
+ """
84
+
85
+ TOOL_CALL = "tool-call"
86
+ MULTI_TOOL_CALL = "multi-tool-call"
87
+ AGENT_THOUGHT = "agent-thought"
88
+ VISION = "vision"
89
+ STREAM_TOOL_CALL = "stream-tool-call"
90
+ DOCUMENT = "document"
91
+ VIDEO = "video"
92
+ AUDIO = "audio"
93
+
94
+
95
+ class DefaultParameterName(StrEnum):
96
+ """
97
+ Enum class for parameter template variable.
98
+ """
99
+
100
+ TEMPERATURE = "temperature"
101
+ TOP_P = "top_p"
102
+ TOP_K = "top_k"
103
+ PRESENCE_PENALTY = "presence_penalty"
104
+ FREQUENCY_PENALTY = "frequency_penalty"
105
+ MAX_TOKENS = "max_tokens"
106
+ RESPONSE_FORMAT = "response_format"
107
+ JSON_SCHEMA = "json_schema"
108
+
109
+ @classmethod
110
+ def value_of(cls, value: Any) -> "DefaultParameterName":
111
+ """
112
+ Get parameter name from value.
113
+
114
+ :param value: parameter value
115
+ :return: parameter name
116
+ """
117
+ for name in cls:
118
+ if name.value == value:
119
+ return name
120
+ raise ValueError(f"invalid parameter name {value}")
121
+
122
+
123
+ class ParameterType(Enum):
124
+ """
125
+ Enum class for parameter type.
126
+ """
127
+
128
+ FLOAT = "float"
129
+ INT = "int"
130
+ STRING = "string"
131
+ BOOLEAN = "boolean"
132
+ TEXT = "text"
133
+
134
+
135
+ class ModelPropertyKey(Enum):
136
+ """
137
+ Enum class for model property key.
138
+ """
139
+
140
+ MODE = "mode"
141
+ CONTEXT_SIZE = "context_size"
142
+ MAX_CHUNKS = "max_chunks"
143
+ FILE_UPLOAD_LIMIT = "file_upload_limit"
144
+ SUPPORTED_FILE_EXTENSIONS = "supported_file_extensions"
145
+ MAX_CHARACTERS_PER_CHUNK = "max_characters_per_chunk"
146
+ DEFAULT_VOICE = "default_voice"
147
+ VOICES = "voices"
148
+ WORD_LIMIT = "word_limit"
149
+ AUDIO_TYPE = "audio_type"
150
+ MAX_WORKERS = "max_workers"
151
+
152
+
153
+ class ProviderModel(BaseModel):
154
+ """
155
+ Model class for provider model.
156
+ """
157
+
158
+ model: str
159
+ label: I18nObject
160
+ model_type: ModelType
161
+ features: Optional[list[ModelFeature]] = None
162
+ fetch_from: FetchFrom
163
+ model_properties: dict[ModelPropertyKey, Any]
164
+ deprecated: bool = False
165
+ model_config = ConfigDict(protected_namespaces=())
166
+
167
+
168
+ class ParameterRule(BaseModel):
169
+ """
170
+ Model class for parameter rule.
171
+ """
172
+
173
+ name: str
174
+ use_template: Optional[str] = None
175
+ label: I18nObject
176
+ type: ParameterType
177
+ help: Optional[I18nObject] = None
178
+ required: bool = False
179
+ default: Optional[Any] = None
180
+ min: Optional[float] = None
181
+ max: Optional[float] = None
182
+ precision: Optional[int] = None
183
+ options: list[str] = []
184
+
185
+
186
+ class PriceConfig(BaseModel):
187
+ """
188
+ Model class for pricing info.
189
+ """
190
+
191
+ input: Decimal
192
+ output: Optional[Decimal] = None
193
+ unit: Decimal
194
+ currency: str
195
+
196
+
197
+ class AIModelEntity(ProviderModel):
198
+ """
199
+ Model class for AI model.
200
+ """
201
+
202
+ parameter_rules: list[ParameterRule] = []
203
+ pricing: Optional[PriceConfig] = None
204
+
205
+
206
+ class ModelUsage(BaseModel):
207
+ pass
208
+
209
+
210
+ class PriceType(Enum):
211
+ """
212
+ Enum class for price type.
213
+ """
214
+
215
+ INPUT = "input"
216
+ OUTPUT = "output"
217
+
218
+
219
+ class PriceInfo(BaseModel):
220
+ """
221
+ Model class for price info.
222
+ """
223
+
224
+ unit_price: Decimal
225
+ unit: Decimal
226
+ total_amount: Decimal
227
+ currency: str
api/core/model_runtime/entities/provider_entities.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Sequence
2
+ from enum import Enum
3
+ from typing import Optional
4
+
5
+ from pydantic import BaseModel, ConfigDict
6
+
7
+ from core.model_runtime.entities.common_entities import I18nObject
8
+ from core.model_runtime.entities.model_entities import ModelType, ProviderModel
9
+
10
+
11
+ class ConfigurateMethod(Enum):
12
+ """
13
+ Enum class for configurate method of provider model.
14
+ """
15
+
16
+ PREDEFINED_MODEL = "predefined-model"
17
+ CUSTOMIZABLE_MODEL = "customizable-model"
18
+
19
+
20
+ class FormType(Enum):
21
+ """
22
+ Enum class for form type.
23
+ """
24
+
25
+ TEXT_INPUT = "text-input"
26
+ SECRET_INPUT = "secret-input"
27
+ SELECT = "select"
28
+ RADIO = "radio"
29
+ SWITCH = "switch"
30
+
31
+
32
+ class FormShowOnObject(BaseModel):
33
+ """
34
+ Model class for form show on.
35
+ """
36
+
37
+ variable: str
38
+ value: str
39
+
40
+
41
+ class FormOption(BaseModel):
42
+ """
43
+ Model class for form option.
44
+ """
45
+
46
+ label: I18nObject
47
+ value: str
48
+ show_on: list[FormShowOnObject] = []
49
+
50
+ def __init__(self, **data):
51
+ super().__init__(**data)
52
+ if not self.label:
53
+ self.label = I18nObject(en_US=self.value)
54
+
55
+
56
+ class CredentialFormSchema(BaseModel):
57
+ """
58
+ Model class for credential form schema.
59
+ """
60
+
61
+ variable: str
62
+ label: I18nObject
63
+ type: FormType
64
+ required: bool = True
65
+ default: Optional[str] = None
66
+ options: Optional[list[FormOption]] = None
67
+ placeholder: Optional[I18nObject] = None
68
+ max_length: int = 0
69
+ show_on: list[FormShowOnObject] = []
70
+
71
+
72
+ class ProviderCredentialSchema(BaseModel):
73
+ """
74
+ Model class for provider credential schema.
75
+ """
76
+
77
+ credential_form_schemas: list[CredentialFormSchema]
78
+
79
+
80
+ class FieldModelSchema(BaseModel):
81
+ label: I18nObject
82
+ placeholder: Optional[I18nObject] = None
83
+
84
+
85
+ class ModelCredentialSchema(BaseModel):
86
+ """
87
+ Model class for model credential schema.
88
+ """
89
+
90
+ model: FieldModelSchema
91
+ credential_form_schemas: list[CredentialFormSchema]
92
+
93
+
94
+ class SimpleProviderEntity(BaseModel):
95
+ """
96
+ Simple model class for provider.
97
+ """
98
+
99
+ provider: str
100
+ label: I18nObject
101
+ icon_small: Optional[I18nObject] = None
102
+ icon_large: Optional[I18nObject] = None
103
+ supported_model_types: Sequence[ModelType]
104
+ models: list[ProviderModel] = []
105
+
106
+
107
+ class ProviderHelpEntity(BaseModel):
108
+ """
109
+ Model class for provider help.
110
+ """
111
+
112
+ title: I18nObject
113
+ url: I18nObject
114
+
115
+
116
+ class ProviderEntity(BaseModel):
117
+ """
118
+ Model class for provider.
119
+ """
120
+
121
+ provider: str
122
+ label: I18nObject
123
+ description: Optional[I18nObject] = None
124
+ icon_small: Optional[I18nObject] = None
125
+ icon_large: Optional[I18nObject] = None
126
+ background: Optional[str] = None
127
+ help: Optional[ProviderHelpEntity] = None
128
+ supported_model_types: Sequence[ModelType]
129
+ configurate_methods: list[ConfigurateMethod]
130
+ models: list[ProviderModel] = []
131
+ provider_credential_schema: Optional[ProviderCredentialSchema] = None
132
+ model_credential_schema: Optional[ModelCredentialSchema] = None
133
+
134
+ # pydantic configs
135
+ model_config = ConfigDict(protected_namespaces=())
136
+
137
+ def to_simple_provider(self) -> SimpleProviderEntity:
138
+ """
139
+ Convert to simple provider.
140
+
141
+ :return: simple provider
142
+ """
143
+ return SimpleProviderEntity(
144
+ provider=self.provider,
145
+ label=self.label,
146
+ icon_small=self.icon_small,
147
+ icon_large=self.icon_large,
148
+ supported_model_types=self.supported_model_types,
149
+ models=self.models,
150
+ )
151
+
152
+
153
+ class ProviderConfig(BaseModel):
154
+ """
155
+ Model class for provider config.
156
+ """
157
+
158
+ provider: str
159
+ credentials: dict
api/core/model_runtime/entities/rerank_entities.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class RerankDocument(BaseModel):
5
+ """
6
+ Model class for rerank document.
7
+ """
8
+
9
+ index: int
10
+ text: str
11
+ score: float
12
+
13
+
14
+ class RerankResult(BaseModel):
15
+ """
16
+ Model class for rerank result.
17
+ """
18
+
19
+ model: str
20
+ docs: list[RerankDocument]
api/core/model_runtime/entities/text_embedding_entities.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from decimal import Decimal
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from core.model_runtime.entities.model_entities import ModelUsage
6
+
7
+
8
+ class EmbeddingUsage(ModelUsage):
9
+ """
10
+ Model class for embedding usage.
11
+ """
12
+
13
+ tokens: int
14
+ total_tokens: int
15
+ unit_price: Decimal
16
+ price_unit: Decimal
17
+ total_price: Decimal
18
+ currency: str
19
+ latency: float
20
+
21
+
22
+ class TextEmbeddingResult(BaseModel):
23
+ """
24
+ Model class for text embedding result.
25
+ """
26
+
27
+ model: str
28
+ embeddings: list[list[float]]
29
+ usage: EmbeddingUsage
api/core/model_runtime/errors/__init__.py ADDED
File without changes
api/core/model_runtime/errors/invoke.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+
4
+ class InvokeError(ValueError):
5
+ """Base class for all LLM exceptions."""
6
+
7
+ description: Optional[str] = None
8
+
9
+ def __init__(self, description: Optional[str] = None) -> None:
10
+ self.description = description
11
+
12
+ def __str__(self):
13
+ return self.description or self.__class__.__name__
14
+
15
+
16
+ class InvokeConnectionError(InvokeError):
17
+ """Raised when the Invoke returns connection error."""
18
+
19
+ description = "Connection Error"
20
+
21
+
22
+ class InvokeServerUnavailableError(InvokeError):
23
+ """Raised when the Invoke returns server unavailable error."""
24
+
25
+ description = "Server Unavailable Error"
26
+
27
+
28
+ class InvokeRateLimitError(InvokeError):
29
+ """Raised when the Invoke returns rate limit error."""
30
+
31
+ description = "Rate Limit Error"
32
+
33
+
34
+ class InvokeAuthorizationError(InvokeError):
35
+ """Raised when the Invoke returns authorization error."""
36
+
37
+ description = "Incorrect model credentials provided, please check and try again. "
38
+
39
+
40
+ class InvokeBadRequestError(InvokeError):
41
+ """Raised when the Invoke returns bad request."""
42
+
43
+ description = "Bad Request Error"
api/core/model_runtime/errors/validate.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ class CredentialsValidateFailedError(ValueError):
2
+ """
3
+ Credentials validate failed error
4
+ """
5
+
6
+ pass