Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Thomas G. Lopes
commited on
Accurate max tokens (#77)
Browse files- .env.example +9 -0
- .prettierignore +2 -0
- eslint.config.mts +1 -0
- package.json +7 -4
- pnpm-lock.yaml +54 -30
- scripts/update-ctx-length.ts +55 -0
- src/app.css +12 -0
- src/lib/components/inference-playground/generation-config-settings.ts +1 -2
- src/lib/components/inference-playground/generation-config.svelte +44 -21
- src/lib/components/inference-playground/playground.svelte +1 -15
- src/lib/components/inference-playground/utils.ts +36 -5
- src/lib/data/context_length.json +299 -0
- src/lib/server/providers/cohere.ts +35 -0
- src/lib/server/providers/fireworks.ts +41 -0
- src/lib/server/providers/hyperbolic.ts +41 -0
- src/lib/server/providers/index.ts +224 -0
- src/lib/server/providers/nebius.ts +49 -0
- src/lib/server/providers/novita.ts +46 -0
- src/lib/server/providers/replicate.ts +37 -0
- src/lib/server/providers/sambanova.ts +52 -0
- src/lib/server/providers/together.ts +37 -0
- src/lib/state/generation-stats.svelte.ts +31 -0
- src/lib/types.ts +1 -0
- src/lib/utils/is.ts +3 -0
- src/lib/utils/object.ts +9 -0
- src/routes/+layout.svelte +2 -0
.env.example
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
HYPERBOLIC_API_KEY=
|
| 2 |
+
COHERE_API_KEY=
|
| 3 |
+
TOGETHER_API_KEY=
|
| 4 |
+
FIREWORKS_API_KEY=
|
| 5 |
+
REPLICATE_API_KEY=
|
| 6 |
+
NEBIUS_API_KEY=
|
| 7 |
+
NOVITA_API_KEY=
|
| 8 |
+
FAL_API_KEY=
|
| 9 |
+
HF_TOKEN=
|
.prettierignore
CHANGED
|
@@ -16,3 +16,5 @@ node_modules
|
|
| 16 |
# Ignore files for PNPM, NPM and YARN
|
| 17 |
pnpm-lock.yaml
|
| 18 |
yarn.lock
|
|
|
|
|
|
|
|
|
| 16 |
# Ignore files for PNPM, NPM and YARN
|
| 17 |
pnpm-lock.yaml
|
| 18 |
yarn.lock
|
| 19 |
+
|
| 20 |
+
context_length.json
|
eslint.config.mts
CHANGED
|
@@ -86,6 +86,7 @@ export default ts.config(
|
|
| 86 |
"**/pnpm-lock.yaml",
|
| 87 |
"**/package-lock.json",
|
| 88 |
"**/yarn.lock",
|
|
|
|
| 89 |
],
|
| 90 |
},
|
| 91 |
{
|
|
|
|
| 86 |
"**/pnpm-lock.yaml",
|
| 87 |
"**/package-lock.json",
|
| 88 |
"**/yarn.lock",
|
| 89 |
+
"context_length.json",
|
| 90 |
],
|
| 91 |
},
|
| 92 |
{
|
package.json
CHANGED
|
@@ -3,24 +3,25 @@
|
|
| 3 |
"version": "0.0.1",
|
| 4 |
"private": true,
|
| 5 |
"scripts": {
|
| 6 |
-
"dev": "vite dev",
|
| 7 |
-
"build": "vite build",
|
| 8 |
"preview": "vite preview",
|
| 9 |
"prepare": "ts-patch install && svelte-kit sync || echo ''",
|
| 10 |
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
| 11 |
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
| 12 |
"lint": "prettier . --check . && eslint src/",
|
| 13 |
"format": "prettier . --write .",
|
| 14 |
-
"clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'"
|
|
|
|
| 15 |
},
|
| 16 |
"devDependencies": {
|
| 17 |
"@eslint/eslintrc": "^3.3.0",
|
| 18 |
"@eslint/js": "^9.22.0",
|
| 19 |
"@floating-ui/dom": "^1.6.13",
|
| 20 |
"@huggingface/hub": "^1.0.1",
|
| 21 |
-
"@huggingface/transformers": "^3.4.2",
|
| 22 |
"@huggingface/inference": "^3.5.1",
|
| 23 |
"@huggingface/tasks": "^0.17.1",
|
|
|
|
| 24 |
"@iconify-json/carbon": "^1.2.8",
|
| 25 |
"@iconify-json/material-symbols": "^1.2.15",
|
| 26 |
"@ryoppippi/unplugin-typia": "^1.0.0",
|
|
@@ -31,7 +32,9 @@
|
|
| 31 |
"@sveltejs/vite-plugin-svelte": "^4.0.0",
|
| 32 |
"@tailwindcss/container-queries": "^0.1.1",
|
| 33 |
"@tailwindcss/postcss": "^4.0.9",
|
|
|
|
| 34 |
"clsx": "^2.1.1",
|
|
|
|
| 35 |
"eslint": "^9.22.0",
|
| 36 |
"eslint-config-prettier": "^10.1.1",
|
| 37 |
"eslint-plugin-prettier": "^5.2.3",
|
|
|
|
| 3 |
"version": "0.0.1",
|
| 4 |
"private": true,
|
| 5 |
"scripts": {
|
| 6 |
+
"dev": "pnpm run update-ctx-length && vite dev",
|
| 7 |
+
"build": "pnpm run update-ctx-length && vite build",
|
| 8 |
"preview": "vite preview",
|
| 9 |
"prepare": "ts-patch install && svelte-kit sync || echo ''",
|
| 10 |
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
|
| 11 |
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
| 12 |
"lint": "prettier . --check . && eslint src/",
|
| 13 |
"format": "prettier . --write .",
|
| 14 |
+
"clean": "rm -rf ./node_modules/ && rm -rf ./.svelte-kit/ && ni && echo 'Project cleaned!'",
|
| 15 |
+
"update-ctx-length": "jiti scripts/update-ctx-length.ts"
|
| 16 |
},
|
| 17 |
"devDependencies": {
|
| 18 |
"@eslint/eslintrc": "^3.3.0",
|
| 19 |
"@eslint/js": "^9.22.0",
|
| 20 |
"@floating-ui/dom": "^1.6.13",
|
| 21 |
"@huggingface/hub": "^1.0.1",
|
|
|
|
| 22 |
"@huggingface/inference": "^3.5.1",
|
| 23 |
"@huggingface/tasks": "^0.17.1",
|
| 24 |
+
"@huggingface/transformers": "^3.4.2",
|
| 25 |
"@iconify-json/carbon": "^1.2.8",
|
| 26 |
"@iconify-json/material-symbols": "^1.2.15",
|
| 27 |
"@ryoppippi/unplugin-typia": "^1.0.0",
|
|
|
|
| 32 |
"@sveltejs/vite-plugin-svelte": "^4.0.0",
|
| 33 |
"@tailwindcss/container-queries": "^0.1.1",
|
| 34 |
"@tailwindcss/postcss": "^4.0.9",
|
| 35 |
+
"@types/node": "^22.14.1",
|
| 36 |
"clsx": "^2.1.1",
|
| 37 |
+
"dotenv": "^16.5.0",
|
| 38 |
"eslint": "^9.22.0",
|
| 39 |
"eslint-config-prettier": "^10.1.1",
|
| 40 |
"eslint-plugin-prettier": "^5.2.3",
|
pnpm-lock.yaml
CHANGED
|
@@ -44,31 +44,37 @@ importers:
|
|
| 44 |
version: 1.2.15
|
| 45 |
'@ryoppippi/unplugin-typia':
|
| 46 |
specifier: ^1.0.0
|
| 47 |
-
version: 1.2.0(@samchon/[email protected])(@types/node@
|
| 48 |
'@samchon/openapi':
|
| 49 |
specifier: ^3.0.0
|
| 50 |
version: 3.0.0
|
| 51 |
'@sveltejs/adapter-auto':
|
| 52 |
specifier: ^3.2.2
|
| 53 |
-
version: 3.3.1(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 54 |
'@sveltejs/adapter-node':
|
| 55 |
specifier: ^5.2.0
|
| 56 |
-
version: 5.2.12(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 57 |
'@sveltejs/kit':
|
| 58 |
specifier: ^2.5.27
|
| 59 |
-
version: 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 60 |
'@sveltejs/vite-plugin-svelte':
|
| 61 |
specifier: ^4.0.0
|
| 62 |
-
version: 4.0.4([email protected])([email protected](@types/node@
|
| 63 |
'@tailwindcss/container-queries':
|
| 64 |
specifier: ^0.1.1
|
| 65 |
version: 0.1.1([email protected])
|
| 66 |
'@tailwindcss/postcss':
|
| 67 |
specifier: ^4.0.9
|
| 68 |
version: 4.0.9
|
|
|
|
|
|
|
|
|
|
| 69 |
clsx:
|
| 70 |
specifier: ^2.1.1
|
| 71 |
version: 2.1.1
|
|
|
|
|
|
|
|
|
|
| 72 |
eslint:
|
| 73 |
specifier: ^9.22.0
|
| 74 |
version: 9.22.0([email protected])
|
|
@@ -137,7 +143,7 @@ importers:
|
|
| 137 |
version: 22.1.0([email protected])
|
| 138 |
vite:
|
| 139 |
specifier: ^5.4.4
|
| 140 |
-
version: 5.4.14(@types/node@
|
| 141 |
|
| 142 |
packages:
|
| 143 |
|
|
@@ -1001,6 +1007,9 @@ packages:
|
|
| 1001 |
'@types/[email protected]':
|
| 1002 |
resolution: {integrity: sha512-ACYy2HGcZPHxEeWTqowTF7dhXN+JU1o7Gr4b41klnn6pj2LD6rsiGqSZojMdk1Jh2ys3m76ap+ae1vvE4+5+vg==}
|
| 1003 |
|
|
|
|
|
|
|
|
|
|
| 1004 |
'@types/[email protected]':
|
| 1005 |
resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
|
| 1006 |
|
|
@@ -1284,6 +1293,10 @@ packages:
|
|
| 1284 |
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
|
| 1285 |
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
|
| 1286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1287 | |
| 1288 |
resolution: {integrity: sha512-pYxfDYpued//QpnLIm4Avk7rsNtAtQkUES2cwAYSvD/wd2pKD71gN2Ebj3e7klzXwjocvE8c5vx/1fxwpqmSxA==}
|
| 1289 |
engines: {node: '>=4'}
|
|
@@ -2404,6 +2417,9 @@ packages:
|
|
| 2404 | |
| 2405 |
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
|
| 2406 |
|
|
|
|
|
|
|
|
|
|
| 2407 | |
| 2408 |
resolution: {integrity: sha512-ect2ZNtk1Zgwb0NVHd0C1IDW/MV+Jk/xaq4t8o6rYdVS3+L660ZdD5kTSQZvsgdwCvquRw+/wYn75hsweRjoIA==}
|
| 2409 |
peerDependencies:
|
|
@@ -3087,7 +3103,7 @@ snapshots:
|
|
| 3087 |
'@rollup/[email protected]':
|
| 3088 |
optional: true
|
| 3089 |
|
| 3090 |
-
'@ryoppippi/[email protected](@samchon/[email protected])(@types/node@
|
| 3091 |
dependencies:
|
| 3092 |
'@rollup/pluginutils': 5.1.4([email protected])
|
| 3093 |
consola: 3.4.0
|
|
@@ -3101,7 +3117,7 @@ snapshots:
|
|
| 3101 |
typescript: 5.6.3
|
| 3102 |
typia: 7.6.4(@samchon/[email protected])([email protected])
|
| 3103 |
unplugin: 1.16.1
|
| 3104 |
-
vite: 6.2.1(@types/node@
|
| 3105 |
transitivePeerDependencies:
|
| 3106 |
- '@samchon/openapi'
|
| 3107 |
- '@types/node'
|
|
@@ -3125,22 +3141,22 @@ snapshots:
|
|
| 3125 |
dependencies:
|
| 3126 |
acorn: 8.14.0
|
| 3127 |
|
| 3128 |
-
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3129 |
dependencies:
|
| 3130 |
-
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3131 |
import-meta-resolve: 4.1.0
|
| 3132 |
|
| 3133 |
-
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3134 |
dependencies:
|
| 3135 |
'@rollup/plugin-commonjs': 28.0.2([email protected])
|
| 3136 |
'@rollup/plugin-json': 6.1.0([email protected])
|
| 3137 |
'@rollup/plugin-node-resolve': 16.0.0([email protected])
|
| 3138 |
-
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3139 |
rollup: 4.34.9
|
| 3140 |
|
| 3141 |
-
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3142 |
dependencies:
|
| 3143 |
-
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@
|
| 3144 |
'@types/cookie': 0.6.0
|
| 3145 |
cookie: 0.6.0
|
| 3146 |
devalue: 5.1.1
|
|
@@ -3153,27 +3169,27 @@ snapshots:
|
|
| 3153 |
set-cookie-parser: 2.7.1
|
| 3154 |
sirv: 3.0.1
|
| 3155 |
svelte: 5.28.2
|
| 3156 |
-
vite: 5.4.14(@types/node@
|
| 3157 |
|
| 3158 |
-
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3159 |
dependencies:
|
| 3160 |
-
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@
|
| 3161 |
debug: 4.4.0
|
| 3162 |
svelte: 5.28.2
|
| 3163 |
-
vite: 5.4.14(@types/node@
|
| 3164 |
transitivePeerDependencies:
|
| 3165 |
- supports-color
|
| 3166 |
|
| 3167 |
-
'@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3168 |
dependencies:
|
| 3169 |
-
'@sveltejs/vite-plugin-svelte-inspector': 3.0.1(@sveltejs/[email protected]([email protected])([email protected](@types/node@
|
| 3170 |
debug: 4.4.0
|
| 3171 |
deepmerge: 4.3.1
|
| 3172 |
kleur: 4.1.5
|
| 3173 |
magic-string: 0.30.17
|
| 3174 |
svelte: 5.28.2
|
| 3175 |
-
vite: 5.4.14(@types/node@
|
| 3176 |
-
vitefu: 1.0.6([email protected](@types/node@
|
| 3177 |
transitivePeerDependencies:
|
| 3178 |
- supports-color
|
| 3179 |
|
|
@@ -3251,13 +3267,17 @@ snapshots:
|
|
| 3251 |
|
| 3252 |
'@types/[email protected]':
|
| 3253 |
dependencies:
|
| 3254 |
-
'@types/node':
|
| 3255 |
form-data: 4.0.2
|
| 3256 |
|
| 3257 |
'@types/[email protected]':
|
| 3258 |
dependencies:
|
| 3259 |
undici-types: 5.26.5
|
| 3260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3261 |
'@types/[email protected]': {}
|
| 3262 |
|
| 3263 |
'@typescript-eslint/[email protected](@typescript-eslint/[email protected]([email protected]([email protected]))([email protected]))([email protected]([email protected]))([email protected])':
|
|
@@ -3524,6 +3544,8 @@ snapshots:
|
|
| 3524 |
|
| 3525 | |
| 3526 |
|
|
|
|
|
|
|
| 3527 | |
| 3528 |
|
| 3529 | |
|
@@ -4336,7 +4358,7 @@ snapshots:
|
|
| 4336 |
'@protobufjs/path': 1.1.2
|
| 4337 |
'@protobufjs/pool': 1.1.0
|
| 4338 |
'@protobufjs/utf8': 1.1.0
|
| 4339 |
-
'@types/node':
|
| 4340 |
long: 5.3.1
|
| 4341 |
|
| 4342 | |
|
@@ -4641,6 +4663,8 @@ snapshots:
|
|
| 4641 |
|
| 4642 | |
| 4643 |
|
|
|
|
|
|
|
| 4644 | |
| 4645 |
dependencies:
|
| 4646 |
'@antfu/install-pkg': 1.0.0
|
|
@@ -4669,31 +4693,31 @@ snapshots:
|
|
| 4669 |
|
| 4670 | |
| 4671 |
|
| 4672 |
-
[email protected](@types/node@
|
| 4673 |
dependencies:
|
| 4674 |
esbuild: 0.21.5
|
| 4675 |
postcss: 8.5.3
|
| 4676 |
rollup: 4.34.9
|
| 4677 |
optionalDependencies:
|
| 4678 |
-
'@types/node':
|
| 4679 |
fsevents: 2.3.3
|
| 4680 |
lightningcss: 1.29.1
|
| 4681 |
|
| 4682 |
-
[email protected](@types/node@
|
| 4683 |
dependencies:
|
| 4684 |
esbuild: 0.25.1
|
| 4685 |
postcss: 8.5.3
|
| 4686 |
rollup: 4.34.9
|
| 4687 |
optionalDependencies:
|
| 4688 |
-
'@types/node':
|
| 4689 |
fsevents: 2.3.3
|
| 4690 |
jiti: 2.4.2
|
| 4691 |
lightningcss: 1.29.1
|
| 4692 |
yaml: 2.7.0
|
| 4693 |
|
| 4694 |
-
[email protected]([email protected](@types/node@
|
| 4695 |
optionalDependencies:
|
| 4696 |
-
vite: 5.4.14(@types/node@
|
| 4697 |
|
| 4698 | |
| 4699 |
dependencies:
|
|
|
|
| 44 |
version: 1.2.15
|
| 45 |
'@ryoppippi/unplugin-typia':
|
| 46 |
specifier: ^1.0.0
|
| 47 |
+
version: 1.2.0(@samchon/[email protected])(@types/node@22.14.1)([email protected])([email protected])([email protected])([email protected])
|
| 48 |
'@samchon/openapi':
|
| 49 |
specifier: ^3.0.0
|
| 50 |
version: 3.0.0
|
| 51 |
'@sveltejs/adapter-auto':
|
| 52 |
specifier: ^3.2.2
|
| 53 |
+
version: 3.3.1(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))
|
| 54 |
'@sveltejs/adapter-node':
|
| 55 |
specifier: ^5.2.0
|
| 56 |
+
version: 5.2.12(@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))
|
| 57 |
'@sveltejs/kit':
|
| 58 |
specifier: ^2.5.27
|
| 59 |
+
version: 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 60 |
'@sveltejs/vite-plugin-svelte':
|
| 61 |
specifier: ^4.0.0
|
| 62 |
+
version: 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 63 |
'@tailwindcss/container-queries':
|
| 64 |
specifier: ^0.1.1
|
| 65 |
version: 0.1.1([email protected])
|
| 66 |
'@tailwindcss/postcss':
|
| 67 |
specifier: ^4.0.9
|
| 68 |
version: 4.0.9
|
| 69 |
+
'@types/node':
|
| 70 |
+
specifier: ^22.14.1
|
| 71 |
+
version: 22.14.1
|
| 72 |
clsx:
|
| 73 |
specifier: ^2.1.1
|
| 74 |
version: 2.1.1
|
| 75 |
+
dotenv:
|
| 76 |
+
specifier: ^16.5.0
|
| 77 |
+
version: 16.5.0
|
| 78 |
eslint:
|
| 79 |
specifier: ^9.22.0
|
| 80 |
version: 9.22.0([email protected])
|
|
|
|
| 143 |
version: 22.1.0([email protected])
|
| 144 |
vite:
|
| 145 |
specifier: ^5.4.4
|
| 146 |
+
version: 5.4.14(@types/node@22.14.1)([email protected])
|
| 147 |
|
| 148 |
packages:
|
| 149 |
|
|
|
|
| 1007 |
'@types/[email protected]':
|
| 1008 |
resolution: {integrity: sha512-ACYy2HGcZPHxEeWTqowTF7dhXN+JU1o7Gr4b41klnn6pj2LD6rsiGqSZojMdk1Jh2ys3m76ap+ae1vvE4+5+vg==}
|
| 1009 |
|
| 1010 |
+
'@types/[email protected]':
|
| 1011 |
+
resolution: {integrity: sha512-u0HuPQwe/dHrItgHHpmw3N2fYCR6x4ivMNbPHRkBVP4CvN+kiRrKHWk3i8tXiO/joPwXLMYvF9TTF0eqgHIuOw==}
|
| 1012 |
+
|
| 1013 |
'@types/[email protected]':
|
| 1014 |
resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
|
| 1015 |
|
|
|
|
| 1293 |
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
|
| 1294 |
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
|
| 1295 |
|
| 1296 | |
| 1297 |
+
resolution: {integrity: sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==}
|
| 1298 |
+
engines: {node: '>=12'}
|
| 1299 |
+
|
| 1300 | |
| 1301 |
resolution: {integrity: sha512-pYxfDYpued//QpnLIm4Avk7rsNtAtQkUES2cwAYSvD/wd2pKD71gN2Ebj3e7klzXwjocvE8c5vx/1fxwpqmSxA==}
|
| 1302 |
engines: {node: '>=4'}
|
|
|
|
| 2417 | |
| 2418 |
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
|
| 2419 |
|
| 2420 | |
| 2421 |
+
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
|
| 2422 |
+
|
| 2423 | |
| 2424 |
resolution: {integrity: sha512-ect2ZNtk1Zgwb0NVHd0C1IDW/MV+Jk/xaq4t8o6rYdVS3+L660ZdD5kTSQZvsgdwCvquRw+/wYn75hsweRjoIA==}
|
| 2425 |
peerDependencies:
|
|
|
|
| 3103 |
'@rollup/[email protected]':
|
| 3104 |
optional: true
|
| 3105 |
|
| 3106 |
+
'@ryoppippi/[email protected](@samchon/[email protected])(@types/node@22.14.1)([email protected])([email protected])([email protected])([email protected])':
|
| 3107 |
dependencies:
|
| 3108 |
'@rollup/pluginutils': 5.1.4([email protected])
|
| 3109 |
consola: 3.4.0
|
|
|
|
| 3117 |
typescript: 5.6.3
|
| 3118 |
typia: 7.6.4(@samchon/[email protected])([email protected])
|
| 3119 |
unplugin: 1.16.1
|
| 3120 |
+
vite: 6.2.1(@types/node@22.14.1)([email protected])([email protected])([email protected])
|
| 3121 |
transitivePeerDependencies:
|
| 3122 |
- '@samchon/openapi'
|
| 3123 |
- '@types/node'
|
|
|
|
| 3141 |
dependencies:
|
| 3142 |
acorn: 8.14.0
|
| 3143 |
|
| 3144 |
+
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))':
|
| 3145 |
dependencies:
|
| 3146 |
+
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 3147 |
import-meta-resolve: 4.1.0
|
| 3148 |
|
| 3149 |
+
'@sveltejs/[email protected](@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected])))':
|
| 3150 |
dependencies:
|
| 3151 |
'@rollup/plugin-commonjs': 28.0.2([email protected])
|
| 3152 |
'@rollup/plugin-json': 6.1.0([email protected])
|
| 3153 |
'@rollup/plugin-node-resolve': 16.0.0([email protected])
|
| 3154 |
+
'@sveltejs/kit': 2.18.0(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 3155 |
rollup: 4.34.9
|
| 3156 |
|
| 3157 |
+
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))':
|
| 3158 |
dependencies:
|
| 3159 |
+
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 3160 |
'@types/cookie': 0.6.0
|
| 3161 |
cookie: 0.6.0
|
| 3162 |
devalue: 5.1.1
|
|
|
|
| 3169 |
set-cookie-parser: 2.7.1
|
| 3170 |
sirv: 3.0.1
|
| 3171 |
svelte: 5.28.2
|
| 3172 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
| 3173 |
|
| 3174 |
+
'@sveltejs/[email protected](@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))':
|
| 3175 |
dependencies:
|
| 3176 |
+
'@sveltejs/vite-plugin-svelte': 4.0.4([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 3177 |
debug: 4.4.0
|
| 3178 |
svelte: 5.28.2
|
| 3179 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
| 3180 |
transitivePeerDependencies:
|
| 3181 |
- supports-color
|
| 3182 |
|
| 3183 |
+
'@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected]))':
|
| 3184 |
dependencies:
|
| 3185 |
+
'@sveltejs/vite-plugin-svelte-inspector': 3.0.1(@sveltejs/[email protected]([email protected])([email protected](@types/node@22.14.1)([email protected])))([email protected])([email protected](@types/node@22.14.1)([email protected]))
|
| 3186 |
debug: 4.4.0
|
| 3187 |
deepmerge: 4.3.1
|
| 3188 |
kleur: 4.1.5
|
| 3189 |
magic-string: 0.30.17
|
| 3190 |
svelte: 5.28.2
|
| 3191 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
| 3192 |
+
vitefu: 1.0.6([email protected](@types/node@22.14.1)([email protected]))
|
| 3193 |
transitivePeerDependencies:
|
| 3194 |
- supports-color
|
| 3195 |
|
|
|
|
| 3267 |
|
| 3268 |
'@types/[email protected]':
|
| 3269 |
dependencies:
|
| 3270 |
+
'@types/node': 22.14.1
|
| 3271 |
form-data: 4.0.2
|
| 3272 |
|
| 3273 |
'@types/[email protected]':
|
| 3274 |
dependencies:
|
| 3275 |
undici-types: 5.26.5
|
| 3276 |
|
| 3277 |
+
'@types/[email protected]':
|
| 3278 |
+
dependencies:
|
| 3279 |
+
undici-types: 6.21.0
|
| 3280 |
+
|
| 3281 |
'@types/[email protected]': {}
|
| 3282 |
|
| 3283 |
'@typescript-eslint/[email protected](@typescript-eslint/[email protected]([email protected]([email protected]))([email protected]))([email protected]([email protected]))([email protected])':
|
|
|
|
| 3544 |
|
| 3545 | |
| 3546 |
|
| 3547 |
+
[email protected]: {}
|
| 3548 |
+
|
| 3549 | |
| 3550 |
|
| 3551 | |
|
|
|
| 4358 |
'@protobufjs/path': 1.1.2
|
| 4359 |
'@protobufjs/pool': 1.1.0
|
| 4360 |
'@protobufjs/utf8': 1.1.0
|
| 4361 |
+
'@types/node': 22.14.1
|
| 4362 |
long: 5.3.1
|
| 4363 |
|
| 4364 | |
|
|
|
| 4663 |
|
| 4664 | |
| 4665 |
|
| 4666 |
+
[email protected]: {}
|
| 4667 |
+
|
| 4668 | |
| 4669 |
dependencies:
|
| 4670 |
'@antfu/install-pkg': 1.0.0
|
|
|
|
| 4693 |
|
| 4694 | |
| 4695 |
|
| 4696 |
+
[email protected](@types/node@22.14.1)([email protected]):
|
| 4697 |
dependencies:
|
| 4698 |
esbuild: 0.21.5
|
| 4699 |
postcss: 8.5.3
|
| 4700 |
rollup: 4.34.9
|
| 4701 |
optionalDependencies:
|
| 4702 |
+
'@types/node': 22.14.1
|
| 4703 |
fsevents: 2.3.3
|
| 4704 |
lightningcss: 1.29.1
|
| 4705 |
|
| 4706 |
+
[email protected](@types/node@22.14.1)([email protected])([email protected])([email protected]):
|
| 4707 |
dependencies:
|
| 4708 |
esbuild: 0.25.1
|
| 4709 |
postcss: 8.5.3
|
| 4710 |
rollup: 4.34.9
|
| 4711 |
optionalDependencies:
|
| 4712 |
+
'@types/node': 22.14.1
|
| 4713 |
fsevents: 2.3.3
|
| 4714 |
jiti: 2.4.2
|
| 4715 |
lightningcss: 1.29.1
|
| 4716 |
yaml: 2.7.0
|
| 4717 |
|
| 4718 |
+
[email protected]([email protected](@types/node@22.14.1)([email protected])):
|
| 4719 |
optionalDependencies:
|
| 4720 |
+
vite: 5.4.14(@types/node@22.14.1)([email protected])
|
| 4721 |
|
| 4722 | |
| 4723 |
dependencies:
|
scripts/update-ctx-length.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dotenv from "dotenv";
|
| 2 |
+
dotenv.config(); // Load .env file into process.env
|
| 3 |
+
|
| 4 |
+
import { fetchAllProviderData, type ApiKeys } from "../src/lib/server/providers/index.js"; // Import ApiKeys type
|
| 5 |
+
import fs from "fs/promises";
|
| 6 |
+
import path from "path";
|
| 7 |
+
|
| 8 |
+
const CACHE_FILE_PATH = path.resolve("src/lib/data/context_length.json");
|
| 9 |
+
|
| 10 |
+
async function runUpdate() {
|
| 11 |
+
console.log("Starting context length cache update...");
|
| 12 |
+
|
| 13 |
+
// Gather API keys from process.env
|
| 14 |
+
const apiKeys: ApiKeys = {
|
| 15 |
+
COHERE_API_KEY: process.env.COHERE_API_KEY,
|
| 16 |
+
TOGETHER_API_KEY: process.env.TOGETHER_API_KEY,
|
| 17 |
+
FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
|
| 18 |
+
HYPERBOLIC_API_KEY: process.env.HYPERBOLIC_API_KEY,
|
| 19 |
+
REPLICATE_API_KEY: process.env.REPLICATE_API_KEY,
|
| 20 |
+
NEBIUS_API_KEY: process.env.NEBIUS_API_KEY,
|
| 21 |
+
NOVITA_API_KEY: process.env.NOVITA_API_KEY,
|
| 22 |
+
SAMBANOVA_API_KEY: process.env.SAMBANOVA_API_KEY,
|
| 23 |
+
};
|
| 24 |
+
|
| 25 |
+
try {
|
| 26 |
+
// Fetch data from all supported providers concurrently, passing keys
|
| 27 |
+
const fetchedData = await fetchAllProviderData(apiKeys);
|
| 28 |
+
|
| 29 |
+
// Read existing manual/cached data
|
| 30 |
+
let existingData = {};
|
| 31 |
+
try {
|
| 32 |
+
const currentCache = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
| 33 |
+
existingData = JSON.parse(currentCache);
|
| 34 |
+
} catch {
|
| 35 |
+
// Remove unused variable name
|
| 36 |
+
console.log("No existing cache file found or error reading, creating new one.");
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
// Merge fetched data with existing data (fetched data takes precedence)
|
| 40 |
+
const combinedData = { ...existingData, ...fetchedData };
|
| 41 |
+
|
| 42 |
+
// Write the combined data back to the file
|
| 43 |
+
const tempFilePath = CACHE_FILE_PATH + ".tmp";
|
| 44 |
+
await fs.writeFile(tempFilePath, JSON.stringify(combinedData, null, "\t"), "utf-8");
|
| 45 |
+
await fs.rename(tempFilePath, CACHE_FILE_PATH);
|
| 46 |
+
|
| 47 |
+
console.log("Context length cache update complete.");
|
| 48 |
+
console.log(`Cache file written to: ${CACHE_FILE_PATH}`);
|
| 49 |
+
} catch (error) {
|
| 50 |
+
console.error("Error during context length cache update:", error);
|
| 51 |
+
process.exit(1); // Exit with error code
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
runUpdate();
|
src/app.css
CHANGED
|
@@ -67,6 +67,18 @@
|
|
| 67 |
@apply flex h-[39px] items-center justify-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
| 68 |
}
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
@utility custom-outline {
|
| 71 |
@apply outline-hidden;
|
| 72 |
@apply border-blue-500 ring ring-blue-500;
|
|
|
|
| 67 |
@apply flex h-[39px] items-center justify-center gap-2 rounded-lg border border-gray-200 bg-white px-3 py-2.5 text-sm font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
| 68 |
}
|
| 69 |
|
| 70 |
+
@utility btn-sm {
|
| 71 |
+
@apply flex h-[32px] items-center justify-center gap-1.5 rounded-md border border-gray-200 bg-white px-2.5 py-2 text-xs font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
@utility btn-xs {
|
| 75 |
+
@apply flex h-[28px] items-center justify-center gap-1 rounded border border-gray-200 bg-white px-2 py-1.5 text-xs font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-4 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
@utility btn-mini {
|
| 79 |
+
@apply flex h-[24px] items-center justify-center gap-0.5 rounded-sm border border-gray-200 bg-white px-1.5 py-1 text-[10px] font-medium text-gray-900 hover:bg-gray-100 hover:text-blue-700 focus:ring-2 focus:ring-gray-100 focus:outline-hidden dark:border-gray-600 dark:bg-gray-800 dark:text-gray-400 dark:hover:bg-gray-700 dark:hover:text-white dark:focus:ring-gray-700;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
@utility custom-outline {
|
| 83 |
@apply outline-hidden;
|
| 84 |
@apply border-blue-500 ring ring-blue-500;
|
src/lib/components/inference-playground/generation-config-settings.ts
CHANGED
|
@@ -7,7 +7,7 @@ export type GenerationConfigKey = (typeof GENERATION_CONFIG_KEYS)[number];
|
|
| 7 |
export type GenerationConfig = Pick<ChatCompletionInput, GenerationConfigKey>;
|
| 8 |
|
| 9 |
interface GenerationKeySettings {
|
| 10 |
-
default
|
| 11 |
step: number;
|
| 12 |
min: number;
|
| 13 |
max: number;
|
|
@@ -23,7 +23,6 @@ export const GENERATION_CONFIG_SETTINGS: Record<GenerationConfigKey, GenerationK
|
|
| 23 |
label: "Temperature",
|
| 24 |
},
|
| 25 |
max_tokens: {
|
| 26 |
-
default: 2048,
|
| 27 |
step: 256,
|
| 28 |
min: 0,
|
| 29 |
max: 8192, // changed dynamically based on model
|
|
|
|
| 7 |
export type GenerationConfig = Pick<ChatCompletionInput, GenerationConfigKey>;
|
| 8 |
|
| 9 |
interface GenerationKeySettings {
|
| 10 |
+
default?: number;
|
| 11 |
step: number;
|
| 12 |
min: number;
|
| 13 |
max: number;
|
|
|
|
| 23 |
label: "Temperature",
|
| 24 |
},
|
| 25 |
max_tokens: {
|
|
|
|
| 26 |
step: 256,
|
| 27 |
min: 0,
|
| 28 |
max: 8192, // changed dynamically based on model
|
src/lib/components/inference-playground/generation-config.svelte
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
<script lang="ts">
|
| 2 |
-
import type
|
| 3 |
-
|
| 4 |
import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generation-config-settings.js";
|
| 5 |
-
import {
|
|
|
|
|
|
|
| 6 |
|
| 7 |
interface Props {
|
| 8 |
conversation: Conversation;
|
|
@@ -11,37 +13,58 @@
|
|
| 11 |
|
| 12 |
let { conversation = $bindable(), classNames = "" }: Props = $props();
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
</script>
|
| 17 |
|
| 18 |
<div class="flex flex-col gap-y-7 {classNames}">
|
| 19 |
{#each GENERATION_CONFIG_KEYS as key}
|
| 20 |
{@const { label, min, step } = GENERATION_CONFIG_SETTINGS[key]}
|
| 21 |
-
{@const
|
|
|
|
|
|
|
| 22 |
<div>
|
| 23 |
<div class="flex items-center justify-between">
|
| 24 |
-
<label for=
|
| 25 |
-
|
| 26 |
-
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
<input
|
| 28 |
-
|
| 29 |
-
|
| 30 |
{min}
|
| 31 |
{max}
|
| 32 |
{step}
|
| 33 |
bind:value={conversation.config[key]}
|
|
|
|
| 34 |
/>
|
| 35 |
-
|
| 36 |
-
<input
|
| 37 |
-
id="temperature-range"
|
| 38 |
-
type="range"
|
| 39 |
-
{min}
|
| 40 |
-
{max}
|
| 41 |
-
{step}
|
| 42 |
-
bind:value={conversation.config[key]}
|
| 43 |
-
class="h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-200 accent-black dark:bg-gray-700 dark:accent-blue-500"
|
| 44 |
-
/>
|
| 45 |
</div>
|
| 46 |
{/each}
|
| 47 |
|
|
|
|
| 1 |
<script lang="ts">
|
| 2 |
+
import { type Conversation } from "$lib/types.js";
|
| 3 |
+
import { watch } from "runed";
|
| 4 |
import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generation-config-settings.js";
|
| 5 |
+
import { maxAllowedTokens } from "./utils.js";
|
| 6 |
+
import { isNumber } from "$lib/utils/is.js";
|
| 7 |
+
import IconX from "~icons/carbon/close";
|
| 8 |
|
| 9 |
interface Props {
|
| 10 |
conversation: Conversation;
|
|
|
|
| 13 |
|
| 14 |
let { conversation = $bindable(), classNames = "" }: Props = $props();
|
| 15 |
|
| 16 |
+
const maxTokens = $derived(maxAllowedTokens(conversation));
|
| 17 |
+
|
| 18 |
+
watch(
|
| 19 |
+
() => maxTokens,
|
| 20 |
+
() => {
|
| 21 |
+
const curr = conversation.config.max_tokens;
|
| 22 |
+
if (!curr || curr <= maxTokens) return;
|
| 23 |
+
conversation.config.max_tokens = maxTokens;
|
| 24 |
+
}
|
| 25 |
+
);
|
| 26 |
</script>
|
| 27 |
|
| 28 |
<div class="flex flex-col gap-y-7 {classNames}">
|
| 29 |
{#each GENERATION_CONFIG_KEYS as key}
|
| 30 |
{@const { label, min, step } = GENERATION_CONFIG_SETTINGS[key]}
|
| 31 |
+
{@const isMaxTokens = key === "max_tokens"}
|
| 32 |
+
{@const max = isMaxTokens ? maxTokens : GENERATION_CONFIG_SETTINGS[key].max}
|
| 33 |
+
|
| 34 |
<div>
|
| 35 |
<div class="flex items-center justify-between">
|
| 36 |
+
<label for={key} class="mb-2 block text-sm font-medium text-gray-900 dark:text-white">
|
| 37 |
+
{label}
|
| 38 |
+
</label>
|
| 39 |
+
<div class="flex items-center gap-2">
|
| 40 |
+
{#if !isMaxTokens || isNumber(conversation.config[key])}
|
| 41 |
+
<input
|
| 42 |
+
type="number"
|
| 43 |
+
class="w-20 rounded-sm border bg-transparent px-1 py-0.5 text-right text-sm dark:border-gray-700"
|
| 44 |
+
{min}
|
| 45 |
+
{max}
|
| 46 |
+
{step}
|
| 47 |
+
bind:value={conversation.config[key]}
|
| 48 |
+
/>
|
| 49 |
+
{/if}
|
| 50 |
+
{#if isMaxTokens && isNumber(conversation.config[key])}
|
| 51 |
+
<button class="btn-mini" onclick={() => (conversation.config[key] = undefined)}> <IconX /> </button>
|
| 52 |
+
{:else if isMaxTokens}
|
| 53 |
+
<button class="btn-mini" onclick={() => (conversation.config[key] = maxTokens / 2)}> set </button>
|
| 54 |
+
{/if}
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
{#if !isMaxTokens || isNumber(conversation.config[key])}
|
| 58 |
<input
|
| 59 |
+
id={key}
|
| 60 |
+
type="range"
|
| 61 |
{min}
|
| 62 |
{max}
|
| 63 |
{step}
|
| 64 |
bind:value={conversation.config[key]}
|
| 65 |
+
class="h-2 w-full cursor-pointer appearance-none rounded-lg bg-gray-200 accent-black dark:bg-gray-700 dark:accent-blue-500"
|
| 66 |
/>
|
| 67 |
+
{/if}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
</div>
|
| 69 |
{/each}
|
| 70 |
|
src/lib/components/inference-playground/playground.svelte
CHANGED
|
@@ -6,7 +6,6 @@
|
|
| 6 |
import { isConversationWithHFModel, type ConversationMessage, type Model, type Project } from "$lib/types.js";
|
| 7 |
import { cmdOrCtrl, optOrAlt } from "$lib/utils/platform.js";
|
| 8 |
import { Popover } from "melt/components";
|
| 9 |
-
import { watch } from "runed";
|
| 10 |
import typia from "typia";
|
| 11 |
import { default as IconDelete } from "~icons/carbon/trash-can";
|
| 12 |
import { showShareModal } from "../share-modal.svelte";
|
|
@@ -19,7 +18,7 @@
|
|
| 19 |
import ModelSelectorModal from "./model-selector-modal.svelte";
|
| 20 |
import ModelSelector from "./model-selector.svelte";
|
| 21 |
import ProjectSelect from "./project-select.svelte";
|
| 22 |
-
import {
|
| 23 |
|
| 24 |
import { iterate } from "$lib/utils/array.js";
|
| 25 |
import IconChatLeft from "~icons/carbon/align-box-bottom-left";
|
|
@@ -43,19 +42,6 @@
|
|
| 43 |
|
| 44 |
let selectCompareModelOpen = $state(false);
|
| 45 |
|
| 46 |
-
watch(
|
| 47 |
-
() => $state.snapshot(session.project),
|
| 48 |
-
() => {
|
| 49 |
-
session.project.conversations.forEach(async (c, i) => {
|
| 50 |
-
session.generationStats[i] = {
|
| 51 |
-
latency: 0,
|
| 52 |
-
...session.generationStats[i],
|
| 53 |
-
generatedTokensCount: await getTokens(c),
|
| 54 |
-
};
|
| 55 |
-
});
|
| 56 |
-
}
|
| 57 |
-
);
|
| 58 |
-
|
| 59 |
const systemPromptSupported = $derived(
|
| 60 |
session.project.conversations.some(conversation => isSystemPromptSupported(conversation.model))
|
| 61 |
);
|
|
|
|
| 6 |
import { isConversationWithHFModel, type ConversationMessage, type Model, type Project } from "$lib/types.js";
|
| 7 |
import { cmdOrCtrl, optOrAlt } from "$lib/utils/platform.js";
|
| 8 |
import { Popover } from "melt/components";
|
|
|
|
| 9 |
import typia from "typia";
|
| 10 |
import { default as IconDelete } from "~icons/carbon/trash-can";
|
| 11 |
import { showShareModal } from "../share-modal.svelte";
|
|
|
|
| 18 |
import ModelSelectorModal from "./model-selector-modal.svelte";
|
| 19 |
import ModelSelector from "./model-selector.svelte";
|
| 20 |
import ProjectSelect from "./project-select.svelte";
|
| 21 |
+
import { isSystemPromptSupported } from "./utils.js";
|
| 22 |
|
| 23 |
import { iterate } from "$lib/utils/array.js";
|
| 24 |
import IconChatLeft from "~icons/carbon/align-box-bottom-left";
|
|
|
|
| 42 |
|
| 43 |
let selectCompareModelOpen = $state(false);
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
const systemPromptSupported = $derived(
|
| 46 |
session.project.conversations.some(conversation => isSystemPromptSupported(conversation.model))
|
| 47 |
);
|
src/lib/components/inference-playground/utils.ts
CHANGED
|
@@ -1,15 +1,18 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
import {
|
| 3 |
isCustomModel,
|
|
|
|
| 4 |
type Conversation,
|
| 5 |
type ConversationMessage,
|
| 6 |
type CustomModel,
|
| 7 |
type Model,
|
| 8 |
} from "$lib/types.js";
|
|
|
|
|
|
|
| 9 |
import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
|
| 10 |
import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
|
| 11 |
-
import {
|
| 12 |
-
import { HfInference, snippets, type InferenceProvider } from "@huggingface/inference";
|
| 13 |
import OpenAI from "openai";
|
| 14 |
|
| 15 |
type ChatCompletionInputMessageChunk =
|
|
@@ -48,6 +51,24 @@ type OpenAICompletionMetadata = {
|
|
| 48 |
|
| 49 |
type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal): CompletionMetadata {
|
| 52 |
const { model, systemMessage } = conversation;
|
| 53 |
|
|
@@ -88,6 +109,7 @@ function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal)
|
|
| 88 |
messages: messages.map(parseMessage),
|
| 89 |
provider: conversation.provider,
|
| 90 |
...conversation.config,
|
|
|
|
| 91 |
},
|
| 92 |
};
|
| 93 |
}
|
|
@@ -284,11 +306,20 @@ export async function getTokenizer(model: Model) {
|
|
| 284 |
}
|
| 285 |
}
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
export async function getTokens(conversation: Conversation): Promise<number> {
|
| 288 |
const model = conversation.model;
|
| 289 |
-
if (isCustomModel(model)) return
|
| 290 |
const tokenizer = await getTokenizer(model);
|
| 291 |
-
if (tokenizer === null) return
|
| 292 |
|
| 293 |
// This is a simplified version - you might need to adjust based on your exact needs
|
| 294 |
let formattedText = "";
|
|
|
|
| 1 |
+
import ctxLengthData from "$lib/data/context_length.json";
|
| 2 |
+
import { token } from "$lib/state/token.svelte";
|
| 3 |
import {
|
| 4 |
isCustomModel,
|
| 5 |
+
isHFModel,
|
| 6 |
type Conversation,
|
| 7 |
type ConversationMessage,
|
| 8 |
type CustomModel,
|
| 9 |
type Model,
|
| 10 |
} from "$lib/types.js";
|
| 11 |
+
import { tryGet } from "$lib/utils/object.js";
|
| 12 |
+
import { HfInference, snippets, type InferenceProvider } from "@huggingface/inference";
|
| 13 |
import type { ChatCompletionInputMessage, InferenceSnippet } from "@huggingface/tasks";
|
| 14 |
import { type ChatCompletionOutputMessage } from "@huggingface/tasks";
|
| 15 |
+
import { AutoTokenizer, PreTrainedTokenizer } from "@huggingface/transformers";
|
|
|
|
| 16 |
import OpenAI from "openai";
|
| 17 |
|
| 18 |
type ChatCompletionInputMessageChunk =
|
|
|
|
| 51 |
|
| 52 |
type CompletionMetadata = HFCompletionMetadata | OpenAICompletionMetadata;
|
| 53 |
|
| 54 |
+
export function maxAllowedTokens(conversation: Conversation) {
|
| 55 |
+
const ctxLength = (() => {
|
| 56 |
+
const { provider, model } = conversation;
|
| 57 |
+
if (!provider || !isHFModel(model)) return;
|
| 58 |
+
|
| 59 |
+
const idOnProvider = model.inferenceProviderMapping.find(data => data.provider === provider)?.providerId;
|
| 60 |
+
if (!idOnProvider) return;
|
| 61 |
+
|
| 62 |
+
const models = tryGet(ctxLengthData, provider);
|
| 63 |
+
if (!models) return;
|
| 64 |
+
|
| 65 |
+
return tryGet(models, idOnProvider) as number | undefined;
|
| 66 |
+
})();
|
| 67 |
+
|
| 68 |
+
if (!ctxLength) return customMaxTokens[conversation.model.id] ?? 100000;
|
| 69 |
+
return ctxLength;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
function getCompletionMetadata(conversation: Conversation, signal?: AbortSignal): CompletionMetadata {
|
| 73 |
const { model, systemMessage } = conversation;
|
| 74 |
|
|
|
|
| 109 |
messages: messages.map(parseMessage),
|
| 110 |
provider: conversation.provider,
|
| 111 |
...conversation.config,
|
| 112 |
+
// max_tokens: maxAllowedTokens(conversation) - currTokens,
|
| 113 |
},
|
| 114 |
};
|
| 115 |
}
|
|
|
|
| 306 |
}
|
| 307 |
}
|
| 308 |
|
| 309 |
+
// When you don't have access to a tokenizer, guesstimate
|
| 310 |
+
export function estimateTokens(conversation: Conversation) {
|
| 311 |
+
const content = conversation.messages.reduce((acc, curr) => {
|
| 312 |
+
return acc + (curr?.content ?? "");
|
| 313 |
+
}, "");
|
| 314 |
+
|
| 315 |
+
return content.length / 4; // 1 token ~ 4 characters
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
export async function getTokens(conversation: Conversation): Promise<number> {
|
| 319 |
const model = conversation.model;
|
| 320 |
+
if (isCustomModel(model)) return estimateTokens(conversation);
|
| 321 |
const tokenizer = await getTokenizer(model);
|
| 322 |
+
if (tokenizer === null) return estimateTokens(conversation);
|
| 323 |
|
| 324 |
// This is a simplified version - you might need to adjust based on your exact needs
|
| 325 |
let formattedText = "";
|
src/lib/data/context_length.json
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"replicate": {},
|
| 3 |
+
"sambanova": {
|
| 4 |
+
"DeepSeek-R1": 16384,
|
| 5 |
+
"DeepSeek-R1-Distill-Llama-70B": 131072,
|
| 6 |
+
"DeepSeek-V3-0324": 16384,
|
| 7 |
+
"E5-Mistral-7B-Instruct": 4096,
|
| 8 |
+
"Llama-4-Maverick-17B-128E-Instruct": 8192,
|
| 9 |
+
"Llama-4-Scout-17B-16E-Instruct": 8192,
|
| 10 |
+
"Meta-Llama-3.1-405B-Instruct": 16384,
|
| 11 |
+
"Meta-Llama-3.1-8B-Instruct": 16384,
|
| 12 |
+
"Meta-Llama-3.2-1B-Instruct": 16384,
|
| 13 |
+
"Meta-Llama-3.2-3B-Instruct": 4096,
|
| 14 |
+
"Meta-Llama-3.3-70B-Instruct": 131072,
|
| 15 |
+
"Meta-Llama-Guard-3-8B": 16384,
|
| 16 |
+
"QwQ-32B": 16384,
|
| 17 |
+
"Qwen2-Audio-7B-Instruct": 4096,
|
| 18 |
+
"Qwen3-32B": 8192
|
| 19 |
+
},
|
| 20 |
+
"nebius": {
|
| 21 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072,
|
| 22 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
| 23 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-fast": 131072,
|
| 24 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
| 25 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131072,
|
| 26 |
+
"meta-llama/Llama-Guard-3-8B": 131072,
|
| 27 |
+
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF-fast": 131072,
|
| 28 |
+
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 131072,
|
| 29 |
+
"nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072,
|
| 30 |
+
"mistralai/Mistral-Nemo-Instruct-2407-fast": 128000,
|
| 31 |
+
"mistralai/Mistral-Nemo-Instruct-2407": 128000,
|
| 32 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1-fast": 32768,
|
| 33 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
| 34 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1-fast": 65536,
|
| 35 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1": 65536,
|
| 36 |
+
"allenai/OLMo-7B-Instruct-hf": 2048,
|
| 37 |
+
"microsoft/Phi-3-mini-4k-instruct-fast": 4096,
|
| 38 |
+
"microsoft/Phi-3-mini-4k-instruct": 4096,
|
| 39 |
+
"microsoft/Phi-3-medium-128k-instruct-fast": 131072,
|
| 40 |
+
"microsoft/Phi-3-medium-128k-instruct": 131072,
|
| 41 |
+
"google/gemma-2-2b-it-fast": 8192,
|
| 42 |
+
"google/gemma-2-2b-it": 8192,
|
| 43 |
+
"google/gemma-2-9b-it-fast": 8192,
|
| 44 |
+
"google/gemma-2-9b-it": 8192,
|
| 45 |
+
"google/gemma-2-27b-it-fast": 8192,
|
| 46 |
+
"google/gemma-2-27b-it": 8192,
|
| 47 |
+
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct-fast": 128000,
|
| 48 |
+
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 128000,
|
| 49 |
+
"Qwen/Qwen2.5-Coder-7B-fast": 32768,
|
| 50 |
+
"Qwen/Qwen2.5-Coder-7B": 32768,
|
| 51 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct-fast": 32768,
|
| 52 |
+
"Qwen/Qwen2.5-Coder-7B-Instruct": 32768,
|
| 53 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072,
|
| 54 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": 131072,
|
| 55 |
+
"Qwen/Qwen2.5-32B-Instruct-fast": 131072,
|
| 56 |
+
"Qwen/Qwen2.5-32B-Instruct": 131072,
|
| 57 |
+
"Qwen/Qwen2.5-72B-Instruct-fast": 131072,
|
| 58 |
+
"Qwen/Qwen2.5-72B-Instruct": 131072,
|
| 59 |
+
"Qwen/Qwen2-VL-72B-Instruct": 32768,
|
| 60 |
+
"Qwen/Qwen2-VL-7B-Instruct": 32768,
|
| 61 |
+
"llava-hf/llava-1.5-7b-hf": 4096,
|
| 62 |
+
"llava-hf/llava-1.5-13b-hf": 4096,
|
| 63 |
+
"aaditya/Llama3-OpenBioLLM-8B": 8192,
|
| 64 |
+
"aaditya/Llama3-OpenBioLLM-70B": 8192,
|
| 65 |
+
"BAAI/bge-en-icl": 32768,
|
| 66 |
+
"BAAI/bge-multilingual-gemma2": 4096,
|
| 67 |
+
"intfloat/e5-mistral-7b-instruct": 32768,
|
| 68 |
+
"cognitivecomputations/dolphin-2.9.2-mixtral-8x22b": 65536,
|
| 69 |
+
"microsoft/Phi-3.5-MoE-instruct": 131072,
|
| 70 |
+
"microsoft/Phi-3.5-mini-instruct": 131072,
|
| 71 |
+
"Qwen/Qwen2.5-1.5B-Instruct": 32768,
|
| 72 |
+
"meta-llama/Llama-3.3-70B-Instruct": 131072,
|
| 73 |
+
"meta-llama/Llama-3.3-70B-Instruct-fast": 131072,
|
| 74 |
+
"meta-llama/Llama-3.2-1B-Instruct": 131072,
|
| 75 |
+
"meta-llama/Llama-3.2-3B-Instruct": 131072,
|
| 76 |
+
"Qwen/QwQ-32B-Preview": 32768,
|
| 77 |
+
"Qwen/QVQ-72B-preview": 128000,
|
| 78 |
+
"microsoft/phi-4": 16384,
|
| 79 |
+
"deepseek-ai/DeepSeek-V3": 163840,
|
| 80 |
+
"deepseek-ai/DeepSeek-R1": 163840,
|
| 81 |
+
"NousResearch/Hermes-3-Llama-405B": 131072,
|
| 82 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
| 83 |
+
"deepseek-ai/DeepSeek-R1-fast": 163840,
|
| 84 |
+
"Qwen/QwQ-32B-fast": 131072,
|
| 85 |
+
"Qwen/QwQ-32B": 131072,
|
| 86 |
+
"nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072,
|
| 87 |
+
"mistralai/Mistral-Small-3.1-24B-Instruct-2503": 131072,
|
| 88 |
+
"google/gemma-3-27b-it": 131072,
|
| 89 |
+
"google/gemma-3-27b-it-fast": 131072,
|
| 90 |
+
"Qwen/Qwen2.5-VL-72B-Instruct": 32000,
|
| 91 |
+
"deepseek-ai/DeepSeek-V3-0324": 163840,
|
| 92 |
+
"deepseek-ai/DeepSeek-V3-0324-fast": 163840,
|
| 93 |
+
"black-forest-labs/flux-dev": 0,
|
| 94 |
+
"black-forest-labs/flux-schnell": 0,
|
| 95 |
+
"stability-ai/sdxl": 0
|
| 96 |
+
},
|
| 97 |
+
"novita": {
|
| 98 |
+
"deepseek/deepseek-prover-v2-671b": 160000,
|
| 99 |
+
"qwen/qwen3-235b-a22b-fp8": 128000,
|
| 100 |
+
"qwen/qwen3-30b-a3b-fp8": 128000,
|
| 101 |
+
"qwen/qwen3-32b-fp8": 128000,
|
| 102 |
+
"deepseek/deepseek-v3-0324": 128000,
|
| 103 |
+
"qwen/qwen2.5-vl-72b-instruct": 96000,
|
| 104 |
+
"deepseek/deepseek-v3-turbo": 64000,
|
| 105 |
+
"deepseek/deepseek-r1-turbo": 64000,
|
| 106 |
+
"meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576,
|
| 107 |
+
"google/gemma-3-27b-it": 32000,
|
| 108 |
+
"qwen/qwq-32b": 32768,
|
| 109 |
+
"Sao10K/L3-8B-Stheno-v3.2": 8192,
|
| 110 |
+
"gryphe/mythomax-l2-13b": 4096,
|
| 111 |
+
"meta-llama/llama-4-scout-17b-16e-instruct": 131072,
|
| 112 |
+
"deepseek/deepseek-r1-distill-llama-8b": 32000,
|
| 113 |
+
"deepseek/deepseek_v3": 64000,
|
| 114 |
+
"meta-llama/llama-3.1-8b-instruct": 16384,
|
| 115 |
+
"deepseek/deepseek-r1-distill-qwen-14b": 64000,
|
| 116 |
+
"meta-llama/llama-3.3-70b-instruct": 131072,
|
| 117 |
+
"qwen/qwen-2.5-72b-instruct": 32000,
|
| 118 |
+
"mistralai/mistral-nemo": 131072,
|
| 119 |
+
"deepseek/deepseek-r1-distill-qwen-32b": 64000,
|
| 120 |
+
"meta-llama/llama-3-8b-instruct": 8192,
|
| 121 |
+
"microsoft/wizardlm-2-8x22b": 65535,
|
| 122 |
+
"deepseek/deepseek-r1-distill-llama-70b": 32000,
|
| 123 |
+
"meta-llama/llama-3.1-70b-instruct": 32768,
|
| 124 |
+
"google/gemma-2-9b-it": 8192,
|
| 125 |
+
"mistralai/mistral-7b-instruct": 32768,
|
| 126 |
+
"meta-llama/llama-3-70b-instruct": 8192,
|
| 127 |
+
"deepseek/deepseek-r1": 64000,
|
| 128 |
+
"nousresearch/hermes-2-pro-llama-3-8b": 8192,
|
| 129 |
+
"sao10k/l3-70b-euryale-v2.1": 8192,
|
| 130 |
+
"cognitivecomputations/dolphin-mixtral-8x22b": 16000,
|
| 131 |
+
"jondurbin/airoboros-l2-70b": 4096,
|
| 132 |
+
"sophosympatheia/midnight-rose-70b": 4096,
|
| 133 |
+
"sao10k/l3-8b-lunaris": 8192,
|
| 134 |
+
"qwen/qwen3-0.6b-fp8": 32000,
|
| 135 |
+
"qwen/qwen3-1.7b-fp8": 32000,
|
| 136 |
+
"qwen/qwen3-8b-fp8": 128000,
|
| 137 |
+
"qwen/qwen3-4b-fp8": 128000,
|
| 138 |
+
"qwen/qwen3-14b-fp8": 128000,
|
| 139 |
+
"thudm/glm-4-9b-0414": 32000,
|
| 140 |
+
"thudm/glm-z1-9b-0414": 32000,
|
| 141 |
+
"thudm/glm-z1-32b-0414": 32000,
|
| 142 |
+
"thudm/glm-4-32b-0414": 32000,
|
| 143 |
+
"thudm/glm-z1-rumination-32b-0414": 32000,
|
| 144 |
+
"qwen/qwen2.5-7b-instruct": 32000,
|
| 145 |
+
"meta-llama/llama-3.2-1b-instruct": 131000,
|
| 146 |
+
"meta-llama/llama-3.2-11b-vision-instruct": 32768,
|
| 147 |
+
"meta-llama/llama-3.2-3b-instruct": 32768,
|
| 148 |
+
"meta-llama/llama-3.1-8b-instruct-bf16": 8192,
|
| 149 |
+
"sao10k/l31-70b-euryale-v2.2": 8192
|
| 150 |
+
},
|
| 151 |
+
"fal": {
|
| 152 |
+
"fal/model-name": 4096
|
| 153 |
+
},
|
| 154 |
+
"cerebras": {
|
| 155 |
+
"cerebras/model-name": 8192
|
| 156 |
+
},
|
| 157 |
+
"hf-inference": {
|
| 158 |
+
"google/gemma-2-9b-it": 8192,
|
| 159 |
+
"meta-llama/Meta-Llama-3-8B-Instruct": 8192
|
| 160 |
+
},
|
| 161 |
+
"hyperbolic": {
|
| 162 |
+
"Qwen/Qwen2.5-72B-Instruct": 131072,
|
| 163 |
+
"Qwen/Qwen2.5-VL-72B-Instruct": 32768,
|
| 164 |
+
"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
|
| 165 |
+
"deepseek-ai/DeepSeek-V3": 131072,
|
| 166 |
+
"deepseek-ai/DeepSeek-V3-0324": 163840,
|
| 167 |
+
"meta-llama/Llama-3.3-70B-Instruct": 131072,
|
| 168 |
+
"Qwen/QwQ-32B-Preview": 32768,
|
| 169 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
|
| 170 |
+
"meta-llama/Llama-3.2-3B-Instruct": 131072,
|
| 171 |
+
"NousResearch/Hermes-3-Llama-3.1-70B": 12288,
|
| 172 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct": 131000,
|
| 173 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
| 174 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
| 175 |
+
"mistralai/Pixtral-12B-2409": 32768,
|
| 176 |
+
"Qwen/Qwen2.5-VL-7B-Instruct": 32768,
|
| 177 |
+
"meta-llama/Meta-Llama-3.1-405B": 32768,
|
| 178 |
+
"meta-llama/Meta-Llama-3.1-405B-FP8": 32768,
|
| 179 |
+
"deepseek-ai/DeepSeek-R1": 163840,
|
| 180 |
+
"Qwen/QwQ-32B": 131072
|
| 181 |
+
},
|
| 182 |
+
"cohere": {
|
| 183 |
+
"embed-english-light-v3.0": 512,
|
| 184 |
+
"embed-multilingual-v2.0": 256,
|
| 185 |
+
"rerank-v3.5": 4096,
|
| 186 |
+
"embed-v4.0": 8192,
|
| 187 |
+
"rerank-english-v3.0": 4096,
|
| 188 |
+
"command-r": 128000,
|
| 189 |
+
"embed-english-light-v3.0-image": 0,
|
| 190 |
+
"embed-english-v3.0-image": 0,
|
| 191 |
+
"command-a-03-2025": 288000,
|
| 192 |
+
"command-nightly": 288000,
|
| 193 |
+
"command-r7b-12-2024": 128000,
|
| 194 |
+
"command-r-plus": 128000,
|
| 195 |
+
"c4ai-aya-vision-32b": 16384,
|
| 196 |
+
"command-r7b-arabic-02-2025": 128000,
|
| 197 |
+
"command-light-nightly": 4096,
|
| 198 |
+
"embed-english-v3.0": 512,
|
| 199 |
+
"embed-multilingual-light-v3.0-image": 0,
|
| 200 |
+
"embed-multilingual-v3.0-image": 0,
|
| 201 |
+
"c4ai-aya-expanse-32b": 128000,
|
| 202 |
+
"command": 4096,
|
| 203 |
+
"c4ai-aya-vision-8b": 16384
|
| 204 |
+
},
|
| 205 |
+
"together": {
|
| 206 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
|
| 207 |
+
"togethercomputer/m2-bert-80M-32k-retrieval": 32768,
|
| 208 |
+
"google/gemma-2-9b-it": 8192,
|
| 209 |
+
"cartesia/sonic": 0,
|
| 210 |
+
"Qwen/Qwen2.5-7B-Instruct-Turbo": 32768,
|
| 211 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192,
|
| 212 |
+
"meta-llama-llama-2-70b-hf": 4096,
|
| 213 |
+
"BAAI/bge-base-en-v1.5": 512,
|
| 214 |
+
"Gryphe/MythoMax-L2-13b": 4096,
|
| 215 |
+
"google/gemma-2-27b-it": 8192,
|
| 216 |
+
"Qwen/Qwen2-VL-72B-Instruct": 32768,
|
| 217 |
+
"meta-llama/LlamaGuard-2-8b": 8192,
|
| 218 |
+
"cartesia/sonic-2": 0,
|
| 219 |
+
"togethercomputer/m2-bert-80M-8k-retrieval": 8192,
|
| 220 |
+
"meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072,
|
| 221 |
+
"arcee-ai/maestro-reasoning": 131072,
|
| 222 |
+
"Qwen/QwQ-32B": 131072,
|
| 223 |
+
"togethercomputer/MoA-1": 32768,
|
| 224 |
+
"mistralai/Mistral-7B-Instruct-v0.2": 32768,
|
| 225 |
+
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192,
|
| 226 |
+
"google/gemma-2b-it": 8192,
|
| 227 |
+
"mistralai/Mistral-Small-24B-Instruct-2501": 32768,
|
| 228 |
+
"Gryphe/MythoMax-L2-13b-Lite": 4096,
|
| 229 |
+
"meta-llama/Meta-Llama-Guard-3-8B": 8192,
|
| 230 |
+
"scb10x/scb10x-llama3-1-typhoon2-8b-instruct": 8192,
|
| 231 |
+
"Qwen/Qwen3-235B-A22B-fp8-tput": 40960,
|
| 232 |
+
"meta-llama/Llama-3-8b-chat-hf": 8192,
|
| 233 |
+
"arcee-ai/caller": 32768,
|
| 234 |
+
"togethercomputer/MoA-1-Turbo": 32768,
|
| 235 |
+
"mistralai/Mistral-7B-Instruct-v0.1": 32768,
|
| 236 |
+
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
| 237 |
+
"scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192,
|
| 238 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072,
|
| 239 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072,
|
| 240 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
| 241 |
+
"arcee-ai/virtuoso-medium-v2": 131072,
|
| 242 |
+
"arcee-ai/coder-large": 32768,
|
| 243 |
+
"arcee-ai/virtuoso-large": 131072,
|
| 244 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072,
|
| 245 |
+
"meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
|
| 246 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072,
|
| 247 |
+
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576,
|
| 248 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072,
|
| 249 |
+
"mistralai/Mixtral-8x7B-v0.1": 32768,
|
| 250 |
+
"meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576,
|
| 251 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815,
|
| 252 |
+
"deepseek-ai/DeepSeek-R1": 163840,
|
| 253 |
+
"arcee-ai/arcee-blitz": 32768,
|
| 254 |
+
"deepseek-ai/DeepSeek-V3-p-dp": 131072,
|
| 255 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072,
|
| 256 |
+
"deepseek-ai/DeepSeek-V3": 131072,
|
| 257 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct": 16384,
|
| 258 |
+
"Qwen/Qwen2-72B-Instruct": 32768,
|
| 259 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
| 260 |
+
"meta-llama/Llama-3-70b-chat-hf": 8192,
|
| 261 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
| 262 |
+
"Salesforce/Llama-Rank-V1": 8192,
|
| 263 |
+
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768,
|
| 264 |
+
"meta-llama/Llama-Vision-Free": 131072,
|
| 265 |
+
"meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
|
| 266 |
+
"meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
|
| 267 |
+
"Qwen/Qwen2.5-72B-Instruct-Turbo": 131072,
|
| 268 |
+
"arcee_ai/arcee-spotlight": 131072,
|
| 269 |
+
"meta-llama/Llama-2-70b-hf": 4096,
|
| 270 |
+
"Qwen/Qwen2.5-VL-72B-Instruct": 32768
|
| 271 |
+
},
|
| 272 |
+
"fireworks-ai": {
|
| 273 |
+
"accounts/fireworks/models/qwq-32b": 131072,
|
| 274 |
+
"accounts/fireworks/models/qwen2-vl-72b-instruct": 32768,
|
| 275 |
+
"accounts/fireworks/models/deepseek-v3": 131072,
|
| 276 |
+
"accounts/fireworks/models/llama-v3p1-8b-instruct": 131072,
|
| 277 |
+
"accounts/fireworks/models/llama-v3p1-70b-instruct": 131072,
|
| 278 |
+
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct": 131072,
|
| 279 |
+
"accounts/fireworks/models/llama-v3-70b-instruct": 8192,
|
| 280 |
+
"accounts/fireworks/models/deepseek-v3-0324": 163840,
|
| 281 |
+
"accounts/fireworks/models/qwen2p5-vl-32b-instruct": 128000,
|
| 282 |
+
"accounts/fireworks/models/llama4-maverick-instruct-basic": 1048576,
|
| 283 |
+
"accounts/fireworks/models/qwen3-30b-a3b": 131072,
|
| 284 |
+
"accounts/fireworks/models/llama4-scout-instruct-basic": 1048576,
|
| 285 |
+
"accounts/fireworks/models/deepseek-r1-basic": 163840,
|
| 286 |
+
"accounts/fireworks/models/qwen-qwq-32b-preview": 32768,
|
| 287 |
+
"accounts/fireworks/models/phi-3-vision-128k-instruct": 32064,
|
| 288 |
+
"accounts/fireworks/models/firesearch-ocr-v6": 131072,
|
| 289 |
+
"accounts/fireworks/models/llama-v3p3-70b-instruct": 131072,
|
| 290 |
+
"accounts/fireworks/models/deepseek-r1": 163840,
|
| 291 |
+
"accounts/yi-01-ai/models/yi-large": 32768,
|
| 292 |
+
"accounts/fireworks/models/llama-v3p1-405b-instruct": 131072,
|
| 293 |
+
"accounts/fireworks/models/llama-guard-3-8b": 131072,
|
| 294 |
+
"accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072,
|
| 295 |
+
"accounts/fireworks/models/mixtral-8x22b-instruct": 65536,
|
| 296 |
+
"accounts/fireworks/models/qwen2p5-72b-instruct": 32768,
|
| 297 |
+
"accounts/perplexity/models/r1-1776": 163840
|
| 298 |
+
}
|
| 299 |
+
}
|
src/lib/server/providers/cohere.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const COHERE_API_URL = "https://api.cohere.ai/v1/models";
|
| 4 |
+
|
| 5 |
+
// Accept apiKey as an argument
|
| 6 |
+
export async function fetchCohereData(apiKey: string | undefined): Promise<MaxTokensCache["cohere"]> {
|
| 7 |
+
if (!apiKey) {
|
| 8 |
+
console.warn("Cohere API key not provided. Skipping Cohere fetch.");
|
| 9 |
+
return {};
|
| 10 |
+
}
|
| 11 |
+
try {
|
| 12 |
+
const response = await fetch(COHERE_API_URL, {
|
| 13 |
+
headers: {
|
| 14 |
+
Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
|
| 15 |
+
},
|
| 16 |
+
});
|
| 17 |
+
if (!response.ok) {
|
| 18 |
+
throw new Error(`Cohere API request failed: ${response.status} ${response.statusText}`);
|
| 19 |
+
}
|
| 20 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
| 21 |
+
const data: any = await response.json();
|
| 22 |
+
const modelsData: MaxTokensCache["cohere"] = {};
|
| 23 |
+
if (data?.models && Array.isArray(data.models)) {
|
| 24 |
+
for (const model of data.models) {
|
| 25 |
+
if (model.name && typeof model.context_length === "number") {
|
| 26 |
+
modelsData[model.name] = model.context_length;
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
return modelsData;
|
| 31 |
+
} catch (error) {
|
| 32 |
+
console.error("Error fetching Cohere data:", error);
|
| 33 |
+
return {};
|
| 34 |
+
}
|
| 35 |
+
}
|
src/lib/server/providers/fireworks.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const FIREWORKS_API_URL = "https://api.fireworks.ai/inference/v1/models"; // Assumed
|
| 4 |
+
|
| 5 |
+
export async function fetchFireworksData(apiKey: string | undefined): Promise<MaxTokensCache["fireworks-ai"]> {
|
| 6 |
+
if (!apiKey) {
|
| 7 |
+
console.warn("Fireworks AI API key not provided. Skipping Fireworks AI fetch.");
|
| 8 |
+
return {};
|
| 9 |
+
}
|
| 10 |
+
try {
|
| 11 |
+
const response = await fetch(FIREWORKS_API_URL, {
|
| 12 |
+
headers: {
|
| 13 |
+
Authorization: `Bearer ${apiKey}`,
|
| 14 |
+
},
|
| 15 |
+
});
|
| 16 |
+
if (!response.ok) {
|
| 17 |
+
throw new Error(`Fireworks AI API request failed: ${response.status} ${response.statusText}`);
|
| 18 |
+
}
|
| 19 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
| 20 |
+
const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
|
| 21 |
+
const modelsData: MaxTokensCache["fireworks-ai"] = {};
|
| 22 |
+
|
| 23 |
+
// Check if data and data.data exist and are an array
|
| 24 |
+
if (data?.data && Array.isArray(data.data)) {
|
| 25 |
+
for (const model of data.data) {
|
| 26 |
+
// Check for common context length fields (OpenAI uses context_window)
|
| 27 |
+
const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
|
| 28 |
+
// Fireworks uses model.id
|
| 29 |
+
if (model.id && typeof contextLength === "number") {
|
| 30 |
+
modelsData[model.id] = contextLength;
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
} else {
|
| 34 |
+
console.warn("Unexpected response structure from Fireworks AI API:", data);
|
| 35 |
+
}
|
| 36 |
+
return modelsData;
|
| 37 |
+
} catch (error) {
|
| 38 |
+
console.error("Error fetching Fireworks AI data:", error);
|
| 39 |
+
return {}; // Return empty on error
|
| 40 |
+
}
|
| 41 |
+
}
|
src/lib/server/providers/hyperbolic.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const HYPERBOLIC_API_URL = "https://api.hyperbolic.xyz/v1/models"; // Assumed
|
| 4 |
+
|
| 5 |
+
export async function fetchHyperbolicData(apiKey: string | undefined): Promise<MaxTokensCache["hyperbolic"]> {
|
| 6 |
+
if (!apiKey) {
|
| 7 |
+
console.warn("Hyperbolic API key not provided. Skipping Hyperbolic fetch.");
|
| 8 |
+
return {};
|
| 9 |
+
}
|
| 10 |
+
try {
|
| 11 |
+
const response = await fetch(HYPERBOLIC_API_URL, {
|
| 12 |
+
headers: {
|
| 13 |
+
Authorization: `Bearer ${apiKey}`,
|
| 14 |
+
},
|
| 15 |
+
});
|
| 16 |
+
if (!response.ok) {
|
| 17 |
+
throw new Error(`Hyperbolic API request failed: ${response.status} ${response.statusText}`);
|
| 18 |
+
}
|
| 19 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
| 20 |
+
const data: any = await response.json(); // Assuming OpenAI structure { data: [ { id: string, ... } ] }
|
| 21 |
+
const modelsData: MaxTokensCache["hyperbolic"] = {};
|
| 22 |
+
|
| 23 |
+
// Check if data and data.data exist and are an array
|
| 24 |
+
if (data?.data && Array.isArray(data.data)) {
|
| 25 |
+
for (const model of data.data) {
|
| 26 |
+
// Check for common context length fields (OpenAI uses context_window)
|
| 27 |
+
const contextLength = model.context_length ?? model.context_window ?? model.config?.max_tokens ?? null;
|
| 28 |
+
// Assuming Hyperbolic uses model.id
|
| 29 |
+
if (model.id && typeof contextLength === "number") {
|
| 30 |
+
modelsData[model.id] = contextLength;
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
} else {
|
| 34 |
+
console.warn("Unexpected response structure from Hyperbolic API:", data);
|
| 35 |
+
}
|
| 36 |
+
return modelsData;
|
| 37 |
+
} catch (error) {
|
| 38 |
+
console.error("Error fetching Hyperbolic data:", error);
|
| 39 |
+
return {}; // Return empty on error
|
| 40 |
+
}
|
| 41 |
+
}
|
src/lib/server/providers/index.ts
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fs from "fs/promises";
|
| 2 |
+
import path from "path";
|
| 3 |
+
import { fetchCohereData } from "./cohere.js";
|
| 4 |
+
import { fetchTogetherData } from "./together.js";
|
| 5 |
+
import { fetchFireworksData } from "./fireworks.js";
|
| 6 |
+
import { fetchHyperbolicData } from "./hyperbolic.js";
|
| 7 |
+
import { fetchReplicateData } from "./replicate.js";
|
| 8 |
+
import { fetchNebiusData } from "./nebius.js";
|
| 9 |
+
import { fetchNovitaData } from "./novita.js";
|
| 10 |
+
import { fetchSambanovaData } from "./sambanova.js";
|
| 11 |
+
|
| 12 |
+
// --- Constants ---
|
| 13 |
+
const CACHE_FILE_PATH = path.resolve("src/lib/server/data/context_length.json");
|
| 14 |
+
|
| 15 |
+
// --- Types ---
|
| 16 |
+
export interface MaxTokensCache {
|
| 17 |
+
[provider: string]: {
|
| 18 |
+
[modelId: string]: number;
|
| 19 |
+
};
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
// Type for API keys object passed to fetchAllProviderData
|
| 23 |
+
export interface ApiKeys {
|
| 24 |
+
COHERE_API_KEY?: string;
|
| 25 |
+
TOGETHER_API_KEY?: string;
|
| 26 |
+
FIREWORKS_API_KEY?: string;
|
| 27 |
+
HYPERBOLIC_API_KEY?: string;
|
| 28 |
+
REPLICATE_API_KEY?: string;
|
| 29 |
+
NEBIUS_API_KEY?: string;
|
| 30 |
+
NOVITA_API_KEY?: string;
|
| 31 |
+
SAMBANOVA_API_KEY?: string;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
// --- Cache Handling ---
|
| 35 |
+
// (readCache and updateCache remain the same)
|
| 36 |
+
let memoryCache: MaxTokensCache | null = null;
|
| 37 |
+
let cacheReadPromise: Promise<MaxTokensCache> | null = null;
|
| 38 |
+
|
| 39 |
+
async function readCache(): Promise<MaxTokensCache> {
|
| 40 |
+
if (memoryCache) {
|
| 41 |
+
return memoryCache;
|
| 42 |
+
}
|
| 43 |
+
if (cacheReadPromise) {
|
| 44 |
+
return cacheReadPromise;
|
| 45 |
+
}
|
| 46 |
+
cacheReadPromise = (async () => {
|
| 47 |
+
try {
|
| 48 |
+
const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
| 49 |
+
memoryCache = JSON.parse(data) as MaxTokensCache;
|
| 50 |
+
return memoryCache!;
|
| 51 |
+
} catch (error: unknown) {
|
| 52 |
+
if (typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT") {
|
| 53 |
+
console.warn(`Cache file not found at ${CACHE_FILE_PATH}, starting with empty cache.`);
|
| 54 |
+
memoryCache = {};
|
| 55 |
+
return {};
|
| 56 |
+
}
|
| 57 |
+
console.error("Error reading context length cache file:", error);
|
| 58 |
+
memoryCache = {};
|
| 59 |
+
return {};
|
| 60 |
+
} finally {
|
| 61 |
+
cacheReadPromise = null;
|
| 62 |
+
}
|
| 63 |
+
})();
|
| 64 |
+
return cacheReadPromise;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
const isBrowser = typeof window !== "undefined";
|
| 68 |
+
|
| 69 |
+
function serverLog(...txt: unknown[]) {
|
| 70 |
+
if (isBrowser) return;
|
| 71 |
+
console.log(...txt);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
function serverError(...txt: unknown[]) {
|
| 75 |
+
if (isBrowser) return;
|
| 76 |
+
console.error(...txt);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
async function updateCache(provider: string, modelId: string, maxTokens: number): Promise<void> {
|
| 80 |
+
try {
|
| 81 |
+
let cache: MaxTokensCache;
|
| 82 |
+
try {
|
| 83 |
+
const data = await fs.readFile(CACHE_FILE_PATH, "utf-8");
|
| 84 |
+
cache = JSON.parse(data) as MaxTokensCache;
|
| 85 |
+
} catch (readError: unknown) {
|
| 86 |
+
if (typeof readError === "object" && readError !== null && "code" in readError && readError.code === "ENOENT") {
|
| 87 |
+
cache = {};
|
| 88 |
+
} else {
|
| 89 |
+
throw readError;
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
if (!cache[provider]) {
|
| 93 |
+
cache[provider] = {};
|
| 94 |
+
}
|
| 95 |
+
cache[provider][modelId] = maxTokens;
|
| 96 |
+
const tempFilePath = CACHE_FILE_PATH + ".tmp";
|
| 97 |
+
await fs.writeFile(tempFilePath, JSON.stringify(cache, null, "\t"), "utf-8");
|
| 98 |
+
await fs.rename(tempFilePath, CACHE_FILE_PATH);
|
| 99 |
+
memoryCache = cache;
|
| 100 |
+
serverLog(`Cache updated for ${provider} - ${modelId}: ${maxTokens}`);
|
| 101 |
+
} catch (error) {
|
| 102 |
+
serverError(`Error updating context length cache for ${provider} - ${modelId}:`, error);
|
| 103 |
+
memoryCache = null;
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// --- Main Exported Function ---
|
| 108 |
+
// Now accepts apiKey as the third argument
|
| 109 |
+
export async function getMaxTokens(
|
| 110 |
+
provider: string,
|
| 111 |
+
modelId: string,
|
| 112 |
+
apiKey: string | undefined
|
| 113 |
+
): Promise<number | null> {
|
| 114 |
+
const cache = await readCache();
|
| 115 |
+
const cachedValue = cache[provider]?.[modelId];
|
| 116 |
+
|
| 117 |
+
if (cachedValue !== undefined) {
|
| 118 |
+
return cachedValue;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
serverLog(`Cache miss for ${provider} - ${modelId}. Attempting live fetch...`);
|
| 122 |
+
|
| 123 |
+
let liveData: number | null = null;
|
| 124 |
+
let fetchedProviderData: MaxTokensCache[string] | null = null;
|
| 125 |
+
|
| 126 |
+
try {
|
| 127 |
+
// Pass the received apiKey to the fetcher functions
|
| 128 |
+
switch (provider) {
|
| 129 |
+
case "cohere":
|
| 130 |
+
fetchedProviderData = await fetchCohereData(apiKey); // Pass apiKey
|
| 131 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 132 |
+
break;
|
| 133 |
+
case "together":
|
| 134 |
+
fetchedProviderData = await fetchTogetherData(apiKey); // Pass apiKey
|
| 135 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 136 |
+
break;
|
| 137 |
+
case "fireworks-ai":
|
| 138 |
+
fetchedProviderData = await fetchFireworksData(apiKey); // Pass apiKey
|
| 139 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 140 |
+
break;
|
| 141 |
+
case "hyperbolic":
|
| 142 |
+
fetchedProviderData = await fetchHyperbolicData(apiKey); // Pass apiKey
|
| 143 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 144 |
+
break;
|
| 145 |
+
case "replicate":
|
| 146 |
+
fetchedProviderData = await fetchReplicateData(apiKey);
|
| 147 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 148 |
+
break;
|
| 149 |
+
case "nebius":
|
| 150 |
+
fetchedProviderData = await fetchNebiusData(apiKey);
|
| 151 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 152 |
+
break;
|
| 153 |
+
case "novita":
|
| 154 |
+
fetchedProviderData = await fetchNovitaData(apiKey);
|
| 155 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 156 |
+
break;
|
| 157 |
+
case "sambanova":
|
| 158 |
+
fetchedProviderData = await fetchSambanovaData(apiKey);
|
| 159 |
+
liveData = fetchedProviderData?.[modelId] ?? null;
|
| 160 |
+
break;
|
| 161 |
+
default:
|
| 162 |
+
serverLog(`Live fetch not supported or implemented for provider: ${provider}`);
|
| 163 |
+
return null;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
if (liveData !== null) {
|
| 167 |
+
serverLog(`Live fetch successful for ${provider} - ${modelId}: ${liveData}`);
|
| 168 |
+
updateCache(provider, modelId, liveData).catch(err => {
|
| 169 |
+
serverError(`Async cache update failed for ${provider} - ${modelId}:`, err);
|
| 170 |
+
});
|
| 171 |
+
return liveData;
|
| 172 |
+
} else {
|
| 173 |
+
serverLog(`Live fetch for ${provider} did not return data for model ${modelId}.`);
|
| 174 |
+
return null;
|
| 175 |
+
}
|
| 176 |
+
} catch (error) {
|
| 177 |
+
serverError(`Error during live fetch for ${provider} - ${modelId}:`, error);
|
| 178 |
+
return null;
|
| 179 |
+
}
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
// --- Helper for Build Script ---
|
| 183 |
+
// Now accepts an apiKeys object
|
| 184 |
+
export async function fetchAllProviderData(apiKeys: ApiKeys): Promise<MaxTokensCache> {
|
| 185 |
+
serverLog("Fetching data for all providers...");
|
| 186 |
+
const results: MaxTokensCache = {};
|
| 187 |
+
|
| 188 |
+
// Define fetchers, passing the specific key from the apiKeys object
|
| 189 |
+
const providerFetchers = [
|
| 190 |
+
{ name: "cohere", fetcher: () => fetchCohereData(apiKeys.COHERE_API_KEY) },
|
| 191 |
+
{ name: "together", fetcher: () => fetchTogetherData(apiKeys.TOGETHER_API_KEY) },
|
| 192 |
+
{ name: "fireworks-ai", fetcher: () => fetchFireworksData(apiKeys.FIREWORKS_API_KEY) },
|
| 193 |
+
{ name: "hyperbolic", fetcher: () => fetchHyperbolicData(apiKeys.HYPERBOLIC_API_KEY) },
|
| 194 |
+
{ name: "replicate", fetcher: () => fetchReplicateData(apiKeys.REPLICATE_API_KEY) },
|
| 195 |
+
{ name: "nebius", fetcher: () => fetchNebiusData(apiKeys.NEBIUS_API_KEY) },
|
| 196 |
+
{ name: "novita", fetcher: () => fetchNovitaData(apiKeys.NOVITA_API_KEY) },
|
| 197 |
+
{ name: "sambanova", fetcher: () => fetchSambanovaData(apiKeys.SAMBANOVA_API_KEY) },
|
| 198 |
+
];
|
| 199 |
+
|
| 200 |
+
const settledResults = await Promise.allSettled(providerFetchers.map(p => p.fetcher()));
|
| 201 |
+
|
| 202 |
+
settledResults.forEach((result, index) => {
|
| 203 |
+
const providerInfo = providerFetchers[index];
|
| 204 |
+
if (!providerInfo) {
|
| 205 |
+
serverError(`Error: No provider info found for index ${index}`);
|
| 206 |
+
return;
|
| 207 |
+
}
|
| 208 |
+
const providerName = providerInfo.name;
|
| 209 |
+
|
| 210 |
+
if (result.status === "fulfilled" && result.value) {
|
| 211 |
+
if (Object.keys(result.value).length > 0) {
|
| 212 |
+
results[providerName] = result.value;
|
| 213 |
+
serverLog(`Successfully fetched data for ${providerName}`);
|
| 214 |
+
} else {
|
| 215 |
+
serverLog(`No data returned for ${providerName}.`);
|
| 216 |
+
}
|
| 217 |
+
} else if (result.status === "rejected") {
|
| 218 |
+
serverError(`Error fetching ${providerName} data:`, result.reason);
|
| 219 |
+
}
|
| 220 |
+
});
|
| 221 |
+
|
| 222 |
+
serverLog("Finished fetching provider data.");
|
| 223 |
+
return results;
|
| 224 |
+
}
|
src/lib/server/providers/nebius.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
interface NebiusModel {
|
| 4 |
+
id: string;
|
| 5 |
+
config?: {
|
| 6 |
+
max_tokens?: number;
|
| 7 |
+
};
|
| 8 |
+
context_length?: number;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
interface NebiusResponse {
|
| 12 |
+
data?: NebiusModel[];
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
const NEBIUS_API_URL = "https://api.studio.nebius.com/v1/models?verbose=true";
|
| 16 |
+
|
| 17 |
+
export async function fetchNebiusData(apiKey: string | undefined): Promise<MaxTokensCache["nebius"]> {
|
| 18 |
+
if (!apiKey) {
|
| 19 |
+
console.warn("Nebius API key not provided. Skipping Nebius fetch.");
|
| 20 |
+
return {};
|
| 21 |
+
}
|
| 22 |
+
try {
|
| 23 |
+
const response = await fetch(NEBIUS_API_URL, {
|
| 24 |
+
headers: {
|
| 25 |
+
Authorization: `Bearer ${apiKey}`,
|
| 26 |
+
},
|
| 27 |
+
});
|
| 28 |
+
if (!response.ok) {
|
| 29 |
+
throw new Error(`Nebius API request failed: ${response.status} ${response.statusText}`);
|
| 30 |
+
}
|
| 31 |
+
const data: NebiusResponse = await response.json();
|
| 32 |
+
const modelsData: MaxTokensCache["nebius"] = {};
|
| 33 |
+
|
| 34 |
+
if (data?.data && Array.isArray(data.data)) {
|
| 35 |
+
for (const model of data.data) {
|
| 36 |
+
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
| 37 |
+
if (model.id && typeof contextLength === "number") {
|
| 38 |
+
modelsData[model.id] = contextLength;
|
| 39 |
+
}
|
| 40 |
+
}
|
| 41 |
+
} else {
|
| 42 |
+
console.warn("Unexpected response structure from Nebius API:", data);
|
| 43 |
+
}
|
| 44 |
+
return modelsData;
|
| 45 |
+
} catch (error) {
|
| 46 |
+
console.error("Error fetching Nebius data:", error);
|
| 47 |
+
return {};
|
| 48 |
+
}
|
| 49 |
+
}
|
src/lib/server/providers/novita.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const NOVITA_API_URL = "https://api.novita.ai/v3/openai/models";
|
| 4 |
+
|
| 5 |
+
interface NovitaModel {
|
| 6 |
+
id: string;
|
| 7 |
+
object: string;
|
| 8 |
+
context_size: number;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
interface NovitaResponse {
|
| 12 |
+
data: NovitaModel[];
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
export async function fetchNovitaData(apiKey: string | undefined): Promise<MaxTokensCache["novita"]> {
|
| 16 |
+
if (!apiKey) {
|
| 17 |
+
console.warn("Novita API key not provided. Skipping Novita fetch.");
|
| 18 |
+
return {};
|
| 19 |
+
}
|
| 20 |
+
try {
|
| 21 |
+
const response = await fetch(NOVITA_API_URL, {
|
| 22 |
+
headers: {
|
| 23 |
+
Authorization: `Bearer ${apiKey}`,
|
| 24 |
+
},
|
| 25 |
+
});
|
| 26 |
+
if (!response.ok) {
|
| 27 |
+
throw new Error(`Novita API request failed: ${response.status} ${response.statusText}`);
|
| 28 |
+
}
|
| 29 |
+
const data: NovitaResponse = await response.json();
|
| 30 |
+
const modelsData: MaxTokensCache["novita"] = {};
|
| 31 |
+
|
| 32 |
+
if (data?.data && Array.isArray(data.data)) {
|
| 33 |
+
for (const model of data.data) {
|
| 34 |
+
if (model.id && typeof model.context_size === "number") {
|
| 35 |
+
modelsData[model.id] = model.context_size;
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
} else {
|
| 39 |
+
console.warn("Unexpected response structure from Novita API:", data);
|
| 40 |
+
}
|
| 41 |
+
return modelsData;
|
| 42 |
+
} catch (error) {
|
| 43 |
+
console.error("Error fetching Novita data:", error);
|
| 44 |
+
return {};
|
| 45 |
+
}
|
| 46 |
+
}
|
src/lib/server/providers/replicate.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const REPLICATE_API_URL = "https://api.replicate.com/v1/models";
|
| 4 |
+
|
| 5 |
+
export async function fetchReplicateData(apiKey: string | undefined): Promise<MaxTokensCache["replicate"]> {
|
| 6 |
+
if (!apiKey) {
|
| 7 |
+
console.warn("Replicate API key not provided. Skipping Replicate fetch.");
|
| 8 |
+
return {};
|
| 9 |
+
}
|
| 10 |
+
try {
|
| 11 |
+
const response = await fetch(REPLICATE_API_URL, {
|
| 12 |
+
headers: {
|
| 13 |
+
Authorization: `Token ${apiKey}`,
|
| 14 |
+
},
|
| 15 |
+
});
|
| 16 |
+
if (!response.ok) {
|
| 17 |
+
throw new Error(`Replicate API request failed: ${response.status} ${response.statusText}`);
|
| 18 |
+
}
|
| 19 |
+
const data = await response.json();
|
| 20 |
+
const modelsData: MaxTokensCache["replicate"] = {};
|
| 21 |
+
|
| 22 |
+
if (data?.results && Array.isArray(data.results)) {
|
| 23 |
+
for (const model of data.results) {
|
| 24 |
+
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
| 25 |
+
if (model.id && typeof contextLength === "number") {
|
| 26 |
+
modelsData[model.id] = contextLength;
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
} else {
|
| 30 |
+
console.warn("Unexpected response structure from Replicate API:", data);
|
| 31 |
+
}
|
| 32 |
+
return modelsData;
|
| 33 |
+
} catch (error) {
|
| 34 |
+
console.error("Error fetching Replicate data:", error);
|
| 35 |
+
return {};
|
| 36 |
+
}
|
| 37 |
+
}
|
src/lib/server/providers/sambanova.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const SAMBANOVA_API_URL = "https://api.sambanova.ai/v1/models";
|
| 4 |
+
|
| 5 |
+
interface SambanovaModel {
|
| 6 |
+
id: string;
|
| 7 |
+
object: string;
|
| 8 |
+
context_length: number;
|
| 9 |
+
max_completion_tokens?: number;
|
| 10 |
+
pricing?: {
|
| 11 |
+
prompt: string;
|
| 12 |
+
completion: string;
|
| 13 |
+
};
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
interface SambanovaResponse {
|
| 17 |
+
data: SambanovaModel[];
|
| 18 |
+
object: string;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
export async function fetchSambanovaData(apiKey: string | undefined): Promise<MaxTokensCache["sambanova"]> {
|
| 22 |
+
if (!apiKey) {
|
| 23 |
+
console.warn("SambaNova API key not provided. Skipping SambaNova fetch.");
|
| 24 |
+
return {};
|
| 25 |
+
}
|
| 26 |
+
try {
|
| 27 |
+
const response = await fetch(SAMBANOVA_API_URL, {
|
| 28 |
+
headers: {
|
| 29 |
+
Authorization: `Bearer ${apiKey}`,
|
| 30 |
+
},
|
| 31 |
+
});
|
| 32 |
+
if (!response.ok) {
|
| 33 |
+
throw new Error(`SambaNova API request failed: ${response.status} ${response.statusText}`);
|
| 34 |
+
}
|
| 35 |
+
const data: SambanovaResponse = await response.json();
|
| 36 |
+
const modelsData: MaxTokensCache["sambanova"] = {};
|
| 37 |
+
|
| 38 |
+
if (data?.data && Array.isArray(data.data)) {
|
| 39 |
+
for (const model of data.data) {
|
| 40 |
+
if (model.id && typeof model.context_length === "number") {
|
| 41 |
+
modelsData[model.id] = model.context_length;
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
} else {
|
| 45 |
+
console.warn("Unexpected response structure from SambaNova API:", data);
|
| 46 |
+
}
|
| 47 |
+
return modelsData;
|
| 48 |
+
} catch (error) {
|
| 49 |
+
console.error("Error fetching SambaNova data:", error);
|
| 50 |
+
return {};
|
| 51 |
+
}
|
| 52 |
+
}
|
src/lib/server/providers/together.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { MaxTokensCache } from "./index.js";
|
| 2 |
+
|
| 3 |
+
const TOGETHER_API_URL = "https://api.together.xyz/v1/models";
|
| 4 |
+
|
| 5 |
+
// Accept apiKey as an argument
|
| 6 |
+
export async function fetchTogetherData(apiKey: string | undefined): Promise<MaxTokensCache["together"]> {
|
| 7 |
+
if (!apiKey) {
|
| 8 |
+
console.warn("Together AI API key not provided. Skipping Together AI fetch.");
|
| 9 |
+
return {};
|
| 10 |
+
}
|
| 11 |
+
try {
|
| 12 |
+
const response = await fetch(TOGETHER_API_URL, {
|
| 13 |
+
headers: {
|
| 14 |
+
Authorization: `Bearer ${apiKey}`, // Use passed-in apiKey
|
| 15 |
+
},
|
| 16 |
+
});
|
| 17 |
+
if (!response.ok) {
|
| 18 |
+
throw new Error(`Together AI API request failed: ${response.status} ${response.statusText}`);
|
| 19 |
+
}
|
| 20 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
| 21 |
+
const data: any[] = await response.json();
|
| 22 |
+
const modelsData: MaxTokensCache["together"] = {};
|
| 23 |
+
|
| 24 |
+
if (Array.isArray(data)) {
|
| 25 |
+
for (const model of data) {
|
| 26 |
+
const contextLength = model.context_length ?? model.config?.max_tokens ?? null;
|
| 27 |
+
if (model.id && typeof contextLength === "number") {
|
| 28 |
+
modelsData[model.id] = contextLength;
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
return modelsData;
|
| 33 |
+
} catch (error) {
|
| 34 |
+
console.error("Error fetching Together AI data:", error);
|
| 35 |
+
return {};
|
| 36 |
+
}
|
| 37 |
+
}
|
src/lib/state/generation-stats.svelte.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { getTokens } from "$lib/components/inference-playground/utils.js";
|
| 2 |
+
import { watch } from "runed";
|
| 3 |
+
import { session } from "./session.svelte";
|
| 4 |
+
|
| 5 |
+
export interface GenerationStats {
|
| 6 |
+
latency: number;
|
| 7 |
+
generatedTokensCount: number;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
function createGenerationStats() {
|
| 11 |
+
let stats = $state([] as Array<GenerationStats>);
|
| 12 |
+
|
| 13 |
+
const init = () => {
|
| 14 |
+
watch(
|
| 15 |
+
() => $state.snapshot(session.project),
|
| 16 |
+
() => {
|
| 17 |
+
session.project.conversations.forEach(async (c, i) => {
|
| 18 |
+
generationStats[i] = { latency: 0, ...generationStats[i], generatedTokensCount: await getTokens(c) };
|
| 19 |
+
});
|
| 20 |
+
}
|
| 21 |
+
);
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
const set = (s: Array<GenerationStats>) => {
|
| 25 |
+
stats = s;
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
return Object.assign(stats, { set, init });
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
export const generationStats = createGenerationStats();
|
src/lib/types.ts
CHANGED
|
@@ -27,6 +27,7 @@ export type ConversationWithHFModel = Conversation & {
|
|
| 27 |
export const isConversationWithHFModel = typia.createIs<ConversationWithHFModel>();
|
| 28 |
export const isConversationWithCustomModel = typia.createIs<ConversationWithCustomModel>();
|
| 29 |
|
|
|
|
| 30 |
export const isCustomModel = typia.createIs<CustomModel>();
|
| 31 |
|
| 32 |
export type Project = {
|
|
|
|
| 27 |
export const isConversationWithHFModel = typia.createIs<ConversationWithHFModel>();
|
| 28 |
export const isConversationWithCustomModel = typia.createIs<ConversationWithCustomModel>();
|
| 29 |
|
| 30 |
+
export const isHFModel = typia.createIs<Model>();
|
| 31 |
export const isCustomModel = typia.createIs<CustomModel>();
|
| 32 |
|
| 33 |
export type Project = {
|
src/lib/utils/is.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import { SvelteSet } from "svelte/reactivity";
|
|
|
|
| 2 |
|
| 3 |
export function isHtmlElement(element: unknown): element is HTMLElement {
|
| 4 |
return element instanceof HTMLElement;
|
|
@@ -35,3 +36,5 @@ export function isTouch(event: PointerEvent): boolean {
|
|
| 35 |
export function isPromise(value: unknown): value is Promise<unknown> {
|
| 36 |
return value instanceof Promise;
|
| 37 |
}
|
|
|
|
|
|
|
|
|
| 1 |
import { SvelteSet } from "svelte/reactivity";
|
| 2 |
+
import typia from "typia";
|
| 3 |
|
| 4 |
export function isHtmlElement(element: unknown): element is HTMLElement {
|
| 5 |
return element instanceof HTMLElement;
|
|
|
|
| 36 |
export function isPromise(value: unknown): value is Promise<unknown> {
|
| 37 |
return value instanceof Promise;
|
| 38 |
}
|
| 39 |
+
|
| 40 |
+
export const isNumber = typia.createIs<number>();
|
src/lib/utils/object.ts
CHANGED
|
@@ -32,3 +32,12 @@ export function pick<T extends Record<string, unknown>, K extends keyof T>(obj:
|
|
| 32 |
}
|
| 33 |
return result;
|
| 34 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
return result;
|
| 34 |
}
|
| 35 |
+
|
| 36 |
+
/**
|
| 37 |
+
* Try and get a value from an object, or return undefined.
|
| 38 |
+
* The key does not need to match the type of the object, so the
|
| 39 |
+
* returned type is an union of all values, and undefined
|
| 40 |
+
*/
|
| 41 |
+
export function tryGet<T extends Record<string, unknown>>(obj: T, key: string): T[keyof T] | undefined {
|
| 42 |
+
return obj[key as keyof T];
|
| 43 |
+
}
|
src/routes/+layout.svelte
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
import ShareModal from "$lib/components/share-modal.svelte";
|
| 7 |
import "../app.css";
|
| 8 |
import { session } from "$lib/state/session.svelte";
|
|
|
|
| 9 |
|
| 10 |
interface Props {
|
| 11 |
children?: import("svelte").Snippet;
|
|
@@ -13,6 +14,7 @@
|
|
| 13 |
|
| 14 |
let { children }: Props = $props();
|
| 15 |
session.init();
|
|
|
|
| 16 |
</script>
|
| 17 |
|
| 18 |
{@render children?.()}
|
|
|
|
| 6 |
import ShareModal from "$lib/components/share-modal.svelte";
|
| 7 |
import "../app.css";
|
| 8 |
import { session } from "$lib/state/session.svelte";
|
| 9 |
+
import { generationStats } from "$lib/state/generation-stats.svelte";
|
| 10 |
|
| 11 |
interface Props {
|
| 12 |
children?: import("svelte").Snippet;
|
|
|
|
| 14 |
|
| 15 |
let { children }: Props = $props();
|
| 16 |
session.init();
|
| 17 |
+
generationStats.init();
|
| 18 |
</script>
|
| 19 |
|
| 20 |
{@render children?.()}
|