Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat(openai): added support for non-streaming o1 (e.g. Azure) models (#1687)
Browse files* feat(openai): added support for non-streaming o1 (e.g. Azure) models
* feat(docs): add non streaming example
* feat: moar docs
---------
Co-authored-by: Nathan Sarrazin <[email protected]>
README.md
CHANGED
|
@@ -482,6 +482,29 @@ MODELS=`[{
|
|
| 482 |
}]`
|
| 483 |
```
|
| 484 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
##### Llama.cpp API server
|
| 486 |
|
| 487 |
chat-ui also supports the llama.cpp API server directly without the need for an adapter. You can do this using the `llamacpp` endpoint type.
|
|
|
|
| 482 |
}]`
|
| 483 |
```
|
| 484 |
|
| 485 |
+
_Non-streaming endpoints_
|
| 486 |
+
|
| 487 |
+
For endpoints that don´t support streaming like o1 on Azure, you can pass `streamingSupported: false` in your endpoint config:
|
| 488 |
+
|
| 489 |
+
```
|
| 490 |
+
MODELS=`[{
|
| 491 |
+
"id": "o1-preview",
|
| 492 |
+
"name": "o1-preview",
|
| 493 |
+
"displayName": "o1-preview",
|
| 494 |
+
"systemRoleSupported": false,
|
| 495 |
+
"endpoints": [
|
| 496 |
+
{
|
| 497 |
+
"type": "openai",
|
| 498 |
+
"baseURL": "https://my-deployment.openai.azure.com/openai/deployments/o1-preview",
|
| 499 |
+
"defaultHeaders": {
|
| 500 |
+
"api-key": "$SECRET"
|
| 501 |
+
},
|
| 502 |
+
"streamingSupported": false,
|
| 503 |
+
}
|
| 504 |
+
]
|
| 505 |
+
}]`
|
| 506 |
+
```
|
| 507 |
+
|
| 508 |
##### Llama.cpp API server
|
| 509 |
|
| 510 |
chat-ui also supports the llama.cpp API server directly without the need for an adapter. You can do this using the `llamacpp` endpoint type.
|
docs/source/configuration/models/providers/openai.md
CHANGED
|
@@ -146,9 +146,36 @@ MODELS=`[{
|
|
| 146 |
}]`
|
| 147 |
```
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
## Other
|
| 150 |
|
| 151 |
Some other providers and their `baseURL` for reference.
|
| 152 |
|
| 153 |
[Groq](https://groq.com/): https://api.groq.com/openai/v1
|
| 154 |
[Fireworks](https://fireworks.ai/): https://api.fireworks.ai/inference/v1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
}]`
|
| 147 |
```
|
| 148 |
|
| 149 |
+
_Non-streaming endpoints_
|
| 150 |
+
|
| 151 |
+
For endpoints that don´t support streaming like o1 on Azure, you can pass `streamingSupported: false` in your endpoint config:
|
| 152 |
+
|
| 153 |
+
```
|
| 154 |
+
MODELS=`[{
|
| 155 |
+
"id": "o1-preview",
|
| 156 |
+
"name": "o1-preview",
|
| 157 |
+
"displayName": "o1-preview",
|
| 158 |
+
"systemRoleSupported": false,
|
| 159 |
+
"endpoints": [
|
| 160 |
+
{
|
| 161 |
+
"type": "openai",
|
| 162 |
+
"baseURL": "https://my-deployment.openai.azure.com/openai/deployments/o1-preview",
|
| 163 |
+
"defaultHeaders": {
|
| 164 |
+
"api-key": "$SECRET"
|
| 165 |
+
},
|
| 166 |
+
"streamingSupported": false,
|
| 167 |
+
}
|
| 168 |
+
]
|
| 169 |
+
}]`
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
## Other
|
| 173 |
|
| 174 |
Some other providers and their `baseURL` for reference.
|
| 175 |
|
| 176 |
[Groq](https://groq.com/): https://api.groq.com/openai/v1
|
| 177 |
[Fireworks](https://fireworks.ai/): https://api.fireworks.ai/inference/v1
|
| 178 |
+
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
```
|
src/lib/server/endpoints/openai/endpointOai.ts
CHANGED
|
@@ -1,8 +1,12 @@
|
|
| 1 |
import { z } from "zod";
|
| 2 |
import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream";
|
| 3 |
-
import {
|
|
|
|
|
|
|
|
|
|
| 4 |
import type { CompletionCreateParamsStreaming } from "openai/resources/completions";
|
| 5 |
import type {
|
|
|
|
| 6 |
ChatCompletionCreateParamsStreaming,
|
| 7 |
ChatCompletionTool,
|
| 8 |
} from "openai/resources/chat/completions";
|
|
@@ -113,6 +117,7 @@ export const endpointOAIParametersSchema = z.object({
|
|
| 113 |
.default({}),
|
| 114 |
/* enable use of max_completion_tokens in place of max_tokens */
|
| 115 |
useCompletionTokens: z.boolean().default(false),
|
|
|
|
| 116 |
});
|
| 117 |
|
| 118 |
export async function endpointOai(
|
|
@@ -128,6 +133,7 @@ export async function endpointOai(
|
|
| 128 |
multimodal,
|
| 129 |
extraBody,
|
| 130 |
useCompletionTokens,
|
|
|
|
| 131 |
} = endpointOAIParametersSchema.parse(input);
|
| 132 |
|
| 133 |
let OpenAI;
|
|
@@ -249,10 +255,10 @@ export async function endpointOai(
|
|
| 249 |
|
| 250 |
const parameters = { ...model.parameters, ...generateSettings };
|
| 251 |
const toolCallChoices = createChatCompletionToolsArray(tools);
|
| 252 |
-
const body
|
| 253 |
model: model.id ?? model.name,
|
| 254 |
messages: messagesOpenAI,
|
| 255 |
-
stream:
|
| 256 |
...(useCompletionTokens
|
| 257 |
? { max_completion_tokens: parameters?.max_new_tokens }
|
| 258 |
: { max_tokens: parameters?.max_new_tokens }),
|
|
@@ -264,15 +270,31 @@ export async function endpointOai(
|
|
| 264 |
...(toolCallChoices.length > 0 ? { tools: toolCallChoices, tool_choice: "auto" } : {}),
|
| 265 |
};
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
};
|
| 277 |
} else {
|
| 278 |
throw new Error("Invalid completion type");
|
|
|
|
| 1 |
import { z } from "zod";
|
| 2 |
import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream";
|
| 3 |
+
import {
|
| 4 |
+
openAIChatToTextGenerationSingle,
|
| 5 |
+
openAIChatToTextGenerationStream,
|
| 6 |
+
} from "./openAIChatToTextGenerationStream";
|
| 7 |
import type { CompletionCreateParamsStreaming } from "openai/resources/completions";
|
| 8 |
import type {
|
| 9 |
+
ChatCompletionCreateParamsNonStreaming,
|
| 10 |
ChatCompletionCreateParamsStreaming,
|
| 11 |
ChatCompletionTool,
|
| 12 |
} from "openai/resources/chat/completions";
|
|
|
|
| 117 |
.default({}),
|
| 118 |
/* enable use of max_completion_tokens in place of max_tokens */
|
| 119 |
useCompletionTokens: z.boolean().default(false),
|
| 120 |
+
streamingSupported: z.boolean().default(true),
|
| 121 |
});
|
| 122 |
|
| 123 |
export async function endpointOai(
|
|
|
|
| 133 |
multimodal,
|
| 134 |
extraBody,
|
| 135 |
useCompletionTokens,
|
| 136 |
+
streamingSupported,
|
| 137 |
} = endpointOAIParametersSchema.parse(input);
|
| 138 |
|
| 139 |
let OpenAI;
|
|
|
|
| 255 |
|
| 256 |
const parameters = { ...model.parameters, ...generateSettings };
|
| 257 |
const toolCallChoices = createChatCompletionToolsArray(tools);
|
| 258 |
+
const body = {
|
| 259 |
model: model.id ?? model.name,
|
| 260 |
messages: messagesOpenAI,
|
| 261 |
+
stream: streamingSupported,
|
| 262 |
...(useCompletionTokens
|
| 263 |
? { max_completion_tokens: parameters?.max_new_tokens }
|
| 264 |
: { max_tokens: parameters?.max_new_tokens }),
|
|
|
|
| 270 |
...(toolCallChoices.length > 0 ? { tools: toolCallChoices, tool_choice: "auto" } : {}),
|
| 271 |
};
|
| 272 |
|
| 273 |
+
if (streamingSupported) {
|
| 274 |
+
const openChatAICompletion = await openai.chat.completions.create(
|
| 275 |
+
body as ChatCompletionCreateParamsStreaming,
|
| 276 |
+
{
|
| 277 |
+
body: { ...body, ...extraBody },
|
| 278 |
+
headers: {
|
| 279 |
+
"ChatUI-Conversation-ID": conversationId?.toString() ?? "",
|
| 280 |
+
"X-use-cache": "false",
|
| 281 |
+
},
|
| 282 |
+
}
|
| 283 |
+
);
|
| 284 |
+
return openAIChatToTextGenerationStream(openChatAICompletion);
|
| 285 |
+
} else {
|
| 286 |
+
const openChatAICompletion = await openai.chat.completions.create(
|
| 287 |
+
body as ChatCompletionCreateParamsNonStreaming,
|
| 288 |
+
{
|
| 289 |
+
body: { ...body, ...extraBody },
|
| 290 |
+
headers: {
|
| 291 |
+
"ChatUI-Conversation-ID": conversationId?.toString() ?? "",
|
| 292 |
+
"X-use-cache": "false",
|
| 293 |
+
},
|
| 294 |
+
}
|
| 295 |
+
);
|
| 296 |
+
return openAIChatToTextGenerationSingle(openChatAICompletion);
|
| 297 |
+
}
|
| 298 |
};
|
| 299 |
} else {
|
| 300 |
throw new Error("Invalid completion type");
|
src/lib/server/endpoints/openai/openAIChatToTextGenerationStream.ts
CHANGED
|
@@ -94,3 +94,25 @@ export async function* openAIChatToTextGenerationStream(
|
|
| 94 |
}
|
| 95 |
}
|
| 96 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
}
|
| 95 |
}
|
| 96 |
}
|
| 97 |
+
|
| 98 |
+
/**
|
| 99 |
+
* Transform a non-streaming OpenAI chat completion into a stream of TextGenerationStreamOutput
|
| 100 |
+
*/
|
| 101 |
+
export async function* openAIChatToTextGenerationSingle(
|
| 102 |
+
completion: OpenAI.Chat.Completions.ChatCompletion
|
| 103 |
+
) {
|
| 104 |
+
const content = completion.choices[0]?.message?.content || "";
|
| 105 |
+
const tokenId = 0;
|
| 106 |
+
|
| 107 |
+
// Yield the content as a single token
|
| 108 |
+
yield {
|
| 109 |
+
token: {
|
| 110 |
+
id: tokenId,
|
| 111 |
+
text: content,
|
| 112 |
+
logprob: 0,
|
| 113 |
+
special: false,
|
| 114 |
+
},
|
| 115 |
+
generated_text: content,
|
| 116 |
+
details: null,
|
| 117 |
+
} as TextGenerationStreamOutput;
|
| 118 |
+
}
|