黄腾 aopstudio commited on
Commit
fa9254d
·
1 Parent(s): df1b4ac

add supprot for lepton (#1866)

Browse files

### What problem does this PR solve?

add supprot for lepton
#1853

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Zhedong Cen <[email protected]>

conf/llm_factories.json CHANGED
@@ -2326,6 +2326,104 @@
2326
  "model_type": "rerank"
2327
  }
2328
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2329
  }
2330
  ]
2331
  }
 
2326
  "model_type": "rerank"
2327
  }
2328
  ]
2329
+ },
2330
+ {
2331
+ "name": "Lepton",
2332
+ "logo": "",
2333
+ "tags": "LLM",
2334
+ "status": "1",
2335
+ "llm": [
2336
+ {
2337
+ "llm_name": "dolphin-mixtral-8x7b",
2338
+ "tags": "LLM,CHAT,32k",
2339
+ "max_tokens": 32768,
2340
+ "model_type": "chat"
2341
+ },
2342
+ {
2343
+ "llm_name": "gemma-7b",
2344
+ "tags": "LLM,CHAT,8k",
2345
+ "max_tokens": 8192,
2346
+ "model_type": "chat"
2347
+ },
2348
+ {
2349
+ "llm_name": "llama3-1-8b",
2350
+ "tags": "LLM,CHAT,4k",
2351
+ "max_tokens": 4096,
2352
+ "model_type": "chat"
2353
+ },
2354
+ {
2355
+ "llm_name": "llama3-8b",
2356
+ "tags": "LLM,CHAT,8K",
2357
+ "max_tokens": 8192,
2358
+ "model_type": "chat"
2359
+ },
2360
+ {
2361
+ "llm_name": "llama2-13b",
2362
+ "tags": "LLM,CHAT,4K",
2363
+ "max_tokens": 4096,
2364
+ "model_type": "chat"
2365
+ },
2366
+ {
2367
+ "llm_name": "llama3-1-70b",
2368
+ "tags": "LLM,CHAT,8k",
2369
+ "max_tokens": 8192,
2370
+ "model_type": "chat"
2371
+ },
2372
+ {
2373
+ "llm_name": "llama3-70b",
2374
+ "tags": "LLM,CHAT,8k",
2375
+ "max_tokens": 8192,
2376
+ "model_type": "chat"
2377
+ },
2378
+ {
2379
+ "llm_name": "llama3-1-405b",
2380
+ "tags": "LLM,CHAT,8k",
2381
+ "max_tokens": 8192,
2382
+ "model_type": "chat"
2383
+ },
2384
+ {
2385
+ "llm_name": "mistral-7b",
2386
+ "tags": "LLM,CHAT,8K",
2387
+ "max_tokens": 8192,
2388
+ "model_type": "chat"
2389
+ },
2390
+ {
2391
+ "llm_name": "mistral-8x7b",
2392
+ "tags": "LLM,CHAT,8K",
2393
+ "max_tokens": 8192,
2394
+ "model_type": "chat"
2395
+ },
2396
+ {
2397
+ "llm_name": "nous-hermes-llama2",
2398
+ "tags": "LLM,CHAT,4k",
2399
+ "max_tokens": 4096,
2400
+ "model_type": "chat"
2401
+ },
2402
+ {
2403
+ "llm_name": "openchat-3-5",
2404
+ "tags": "LLM,CHAT,4k",
2405
+ "max_tokens": 4096,
2406
+ "model_type": "chat"
2407
+ },
2408
+ {
2409
+ "llm_name": "toppy-m-7b",
2410
+ "tags": "LLM,CHAT,4k",
2411
+ "max_tokens": 4096,
2412
+ "model_type": "chat"
2413
+ },
2414
+ {
2415
+ "llm_name": "wizardlm-2-7b",
2416
+ "tags": "LLM,CHAT,32k",
2417
+ "max_tokens": 32768,
2418
+ "model_type": "chat"
2419
+ },
2420
+ {
2421
+ "llm_name": "wizardlm-2-8x22b",
2422
+ "tags": "LLM,CHAT,64K",
2423
+ "max_tokens": 65536,
2424
+ "model_type": "chat"
2425
+ }
2426
+ ]
2427
  }
2428
  ]
2429
  }
rag/llm/__init__.py CHANGED
@@ -83,7 +83,8 @@ ChatModel = {
83
  "NVIDIA": NvidiaChat,
84
  "LM-Studio": LmStudioChat,
85
  "OpenAI-API-Compatible": OpenAI_APIChat,
86
- "cohere": CoHereChat
 
87
  }
88
 
89
 
 
83
  "NVIDIA": NvidiaChat,
84
  "LM-Studio": LmStudioChat,
85
  "OpenAI-API-Compatible": OpenAI_APIChat,
86
+ "cohere": CoHereChat,
87
+ "LeptonAI": LeptonAIChat
88
  }
89
 
90
 
rag/llm/chat_model.py CHANGED
@@ -71,7 +71,7 @@ class Base(ABC):
71
  total_tokens
72
  + num_tokens_from_string(resp.choices[0].delta.content)
73
  )
74
- if not hasattr(resp, "usage")
75
  else resp.usage["total_tokens"]
76
  )
77
  if resp.choices[0].finish_reason == "length":
@@ -981,3 +981,10 @@ class CoHereChat(Base):
981
  yield ans + "\n**ERROR**: " + str(e)
982
 
983
  yield total_tokens
 
 
 
 
 
 
 
 
71
  total_tokens
72
  + num_tokens_from_string(resp.choices[0].delta.content)
73
  )
74
+ if not hasattr(resp, "usage") or not resp.usage
75
  else resp.usage["total_tokens"]
76
  )
77
  if resp.choices[0].finish_reason == "length":
 
981
  yield ans + "\n**ERROR**: " + str(e)
982
 
983
  yield total_tokens
984
+
985
+
986
+ class LeptonAIChat(Base):
987
+ def __init__(self, key, model_name, base_url=None):
988
+ if not base_url:
989
+ base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
990
+ super().__init__(key, model_name, base_url)
web/src/assets/svg/llm/lepton.svg ADDED
web/src/pages/user-setting/setting-model/constant.ts CHANGED
@@ -23,7 +23,8 @@ export const IconMap = {
23
  NVIDIA:'nvidia',
24
  'LM-Studio':'lm-studio',
25
  'OpenAI-API-Compatible':'openai-api',
26
- 'cohere':'cohere'
 
27
  };
28
 
29
  export const BedrockRegionList = [
 
23
  NVIDIA:'nvidia',
24
  'LM-Studio':'lm-studio',
25
  'OpenAI-API-Compatible':'openai-api',
26
+ 'cohere':'cohere',
27
+ 'LeptonAI':'lepton'
28
  };
29
 
30
  export const BedrockRegionList = [