add supprot for lepton (#1866)
Browse files### What problem does this PR solve?
add supprot for lepton
#1853
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Co-authored-by: Zhedong Cen <[email protected]>
conf/llm_factories.json
CHANGED
@@ -2326,6 +2326,104 @@
|
|
2326 |
"model_type": "rerank"
|
2327 |
}
|
2328 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2329 |
}
|
2330 |
]
|
2331 |
}
|
|
|
2326 |
"model_type": "rerank"
|
2327 |
}
|
2328 |
]
|
2329 |
+
},
|
2330 |
+
{
|
2331 |
+
"name": "Lepton",
|
2332 |
+
"logo": "",
|
2333 |
+
"tags": "LLM",
|
2334 |
+
"status": "1",
|
2335 |
+
"llm": [
|
2336 |
+
{
|
2337 |
+
"llm_name": "dolphin-mixtral-8x7b",
|
2338 |
+
"tags": "LLM,CHAT,32k",
|
2339 |
+
"max_tokens": 32768,
|
2340 |
+
"model_type": "chat"
|
2341 |
+
},
|
2342 |
+
{
|
2343 |
+
"llm_name": "gemma-7b",
|
2344 |
+
"tags": "LLM,CHAT,8k",
|
2345 |
+
"max_tokens": 8192,
|
2346 |
+
"model_type": "chat"
|
2347 |
+
},
|
2348 |
+
{
|
2349 |
+
"llm_name": "llama3-1-8b",
|
2350 |
+
"tags": "LLM,CHAT,4k",
|
2351 |
+
"max_tokens": 4096,
|
2352 |
+
"model_type": "chat"
|
2353 |
+
},
|
2354 |
+
{
|
2355 |
+
"llm_name": "llama3-8b",
|
2356 |
+
"tags": "LLM,CHAT,8K",
|
2357 |
+
"max_tokens": 8192,
|
2358 |
+
"model_type": "chat"
|
2359 |
+
},
|
2360 |
+
{
|
2361 |
+
"llm_name": "llama2-13b",
|
2362 |
+
"tags": "LLM,CHAT,4K",
|
2363 |
+
"max_tokens": 4096,
|
2364 |
+
"model_type": "chat"
|
2365 |
+
},
|
2366 |
+
{
|
2367 |
+
"llm_name": "llama3-1-70b",
|
2368 |
+
"tags": "LLM,CHAT,8k",
|
2369 |
+
"max_tokens": 8192,
|
2370 |
+
"model_type": "chat"
|
2371 |
+
},
|
2372 |
+
{
|
2373 |
+
"llm_name": "llama3-70b",
|
2374 |
+
"tags": "LLM,CHAT,8k",
|
2375 |
+
"max_tokens": 8192,
|
2376 |
+
"model_type": "chat"
|
2377 |
+
},
|
2378 |
+
{
|
2379 |
+
"llm_name": "llama3-1-405b",
|
2380 |
+
"tags": "LLM,CHAT,8k",
|
2381 |
+
"max_tokens": 8192,
|
2382 |
+
"model_type": "chat"
|
2383 |
+
},
|
2384 |
+
{
|
2385 |
+
"llm_name": "mistral-7b",
|
2386 |
+
"tags": "LLM,CHAT,8K",
|
2387 |
+
"max_tokens": 8192,
|
2388 |
+
"model_type": "chat"
|
2389 |
+
},
|
2390 |
+
{
|
2391 |
+
"llm_name": "mistral-8x7b",
|
2392 |
+
"tags": "LLM,CHAT,8K",
|
2393 |
+
"max_tokens": 8192,
|
2394 |
+
"model_type": "chat"
|
2395 |
+
},
|
2396 |
+
{
|
2397 |
+
"llm_name": "nous-hermes-llama2",
|
2398 |
+
"tags": "LLM,CHAT,4k",
|
2399 |
+
"max_tokens": 4096,
|
2400 |
+
"model_type": "chat"
|
2401 |
+
},
|
2402 |
+
{
|
2403 |
+
"llm_name": "openchat-3-5",
|
2404 |
+
"tags": "LLM,CHAT,4k",
|
2405 |
+
"max_tokens": 4096,
|
2406 |
+
"model_type": "chat"
|
2407 |
+
},
|
2408 |
+
{
|
2409 |
+
"llm_name": "toppy-m-7b",
|
2410 |
+
"tags": "LLM,CHAT,4k",
|
2411 |
+
"max_tokens": 4096,
|
2412 |
+
"model_type": "chat"
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"llm_name": "wizardlm-2-7b",
|
2416 |
+
"tags": "LLM,CHAT,32k",
|
2417 |
+
"max_tokens": 32768,
|
2418 |
+
"model_type": "chat"
|
2419 |
+
},
|
2420 |
+
{
|
2421 |
+
"llm_name": "wizardlm-2-8x22b",
|
2422 |
+
"tags": "LLM,CHAT,64K",
|
2423 |
+
"max_tokens": 65536,
|
2424 |
+
"model_type": "chat"
|
2425 |
+
}
|
2426 |
+
]
|
2427 |
}
|
2428 |
]
|
2429 |
}
|
rag/llm/__init__.py
CHANGED
@@ -83,7 +83,8 @@ ChatModel = {
|
|
83 |
"NVIDIA": NvidiaChat,
|
84 |
"LM-Studio": LmStudioChat,
|
85 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
86 |
-
"cohere": CoHereChat
|
|
|
87 |
}
|
88 |
|
89 |
|
|
|
83 |
"NVIDIA": NvidiaChat,
|
84 |
"LM-Studio": LmStudioChat,
|
85 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
86 |
+
"cohere": CoHereChat,
|
87 |
+
"LeptonAI": LeptonAIChat
|
88 |
}
|
89 |
|
90 |
|
rag/llm/chat_model.py
CHANGED
@@ -71,7 +71,7 @@ class Base(ABC):
|
|
71 |
total_tokens
|
72 |
+ num_tokens_from_string(resp.choices[0].delta.content)
|
73 |
)
|
74 |
-
if not hasattr(resp, "usage")
|
75 |
else resp.usage["total_tokens"]
|
76 |
)
|
77 |
if resp.choices[0].finish_reason == "length":
|
@@ -981,3 +981,10 @@ class CoHereChat(Base):
|
|
981 |
yield ans + "\n**ERROR**: " + str(e)
|
982 |
|
983 |
yield total_tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
total_tokens
|
72 |
+ num_tokens_from_string(resp.choices[0].delta.content)
|
73 |
)
|
74 |
+
if not hasattr(resp, "usage") or not resp.usage
|
75 |
else resp.usage["total_tokens"]
|
76 |
)
|
77 |
if resp.choices[0].finish_reason == "length":
|
|
|
981 |
yield ans + "\n**ERROR**: " + str(e)
|
982 |
|
983 |
yield total_tokens
|
984 |
+
|
985 |
+
|
986 |
+
class LeptonAIChat(Base):
|
987 |
+
def __init__(self, key, model_name, base_url=None):
|
988 |
+
if not base_url:
|
989 |
+
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
|
990 |
+
super().__init__(key, model_name, base_url)
|
web/src/assets/svg/llm/lepton.svg
ADDED
|
web/src/pages/user-setting/setting-model/constant.ts
CHANGED
@@ -23,7 +23,8 @@ export const IconMap = {
|
|
23 |
NVIDIA:'nvidia',
|
24 |
'LM-Studio':'lm-studio',
|
25 |
'OpenAI-API-Compatible':'openai-api',
|
26 |
-
'cohere':'cohere'
|
|
|
27 |
};
|
28 |
|
29 |
export const BedrockRegionList = [
|
|
|
23 |
NVIDIA:'nvidia',
|
24 |
'LM-Studio':'lm-studio',
|
25 |
'OpenAI-API-Compatible':'openai-api',
|
26 |
+
'cohere':'cohere',
|
27 |
+
'LeptonAI':'lepton'
|
28 |
};
|
29 |
|
30 |
export const BedrockRegionList = [
|