Commit
·
4efb547
1
Parent(s):
46ac909
Update main.py
Browse files
main.py
CHANGED
@@ -53,7 +53,7 @@ class Message(BaseModel):
|
|
53 |
|
54 |
class ChatCompletionRequest(BaseModel):
|
55 |
messages: List[Message]
|
56 |
-
max_tokens: int =
|
57 |
|
58 |
@app.post("/v1/completions")
|
59 |
async def completion(request: ChatCompletionRequest, response_mode=None):
|
@@ -66,7 +66,7 @@ async def chat(request: ChatCompletionRequest):
|
|
66 |
tokens = llm.tokenize(combined_messages)
|
67 |
|
68 |
try:
|
69 |
-
chat_chunks = llm.generate(tokens,
|
70 |
except Exception as e:
|
71 |
raise HTTPException(status_code=500, detail=str(e))
|
72 |
|
|
|
53 |
|
54 |
class ChatCompletionRequest(BaseModel):
|
55 |
messages: List[Message]
|
56 |
+
max_tokens: int = 250
|
57 |
|
58 |
@app.post("/v1/completions")
|
59 |
async def completion(request: ChatCompletionRequest, response_mode=None):
|
|
|
66 |
tokens = llm.tokenize(combined_messages)
|
67 |
|
68 |
try:
|
69 |
+
chat_chunks = llm.generate(tokens, max_new_tokens=request.max_tokens)
|
70 |
except Exception as e:
|
71 |
raise HTTPException(status_code=500, detail=str(e))
|
72 |
|