Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -45,12 +45,12 @@ class ChatCompletionResponse(BaseModel):
|
|
45 |
choices: list[ChatCompletionResponseChoice]
|
46 |
|
47 |
# --- Définition de l'API compatible OpenAI ---
|
48 |
-
|
|
|
49 |
async def create_chat_completion(request: ChatCompletionRequest):
|
50 |
"""
|
51 |
Endpoint compatible avec l'API OpenAI Chat Completions.
|
52 |
"""
|
53 |
-
# Extraire le dernier message utilisateur pour le prompt
|
54 |
user_prompt = ""
|
55 |
if request.messages and request.messages[-1].role == "user":
|
56 |
user_prompt = request.messages[-1].content
|
@@ -58,17 +58,13 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
58 |
if not user_prompt:
|
59 |
return {"error": "No user prompt found"}
|
60 |
|
61 |
-
# Préparation des inputs pour le modèle DeepSeek
|
62 |
messages_for_model = [{'role': 'user', 'content': user_prompt}]
|
63 |
inputs = tokenizer.apply_chat_template(messages_for_model, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
|
64 |
|
65 |
-
# Génération
|
66 |
outputs = model.generate(inputs, max_new_tokens=request.max_tokens, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
|
67 |
|
68 |
-
# Décodage
|
69 |
response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
|
70 |
|
71 |
-
# Formatage de la réponse au format OpenAI
|
72 |
response_message = ChatMessage(role="assistant", content=response_text)
|
73 |
choice = ChatCompletionResponseChoice(message=response_message)
|
74 |
completion_response = ChatCompletionResponse(
|
|
|
45 |
choices: list[ChatCompletionResponseChoice]
|
46 |
|
47 |
# --- Définition de l'API compatible OpenAI ---
|
48 |
+
# LA SEULE LIGNE QUI CHANGE EST CELLE-CI : on a enlevé le "/v1"
|
49 |
+
@app.post("/chat/completions")
|
50 |
async def create_chat_completion(request: ChatCompletionRequest):
|
51 |
"""
|
52 |
Endpoint compatible avec l'API OpenAI Chat Completions.
|
53 |
"""
|
|
|
54 |
user_prompt = ""
|
55 |
if request.messages and request.messages[-1].role == "user":
|
56 |
user_prompt = request.messages[-1].content
|
|
|
58 |
if not user_prompt:
|
59 |
return {"error": "No user prompt found"}
|
60 |
|
|
|
61 |
messages_for_model = [{'role': 'user', 'content': user_prompt}]
|
62 |
inputs = tokenizer.apply_chat_template(messages_for_model, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
|
63 |
|
|
|
64 |
outputs = model.generate(inputs, max_new_tokens=request.max_tokens, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
|
65 |
|
|
|
66 |
response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
|
67 |
|
|
|
68 |
response_message = ChatMessage(role="assistant", content=response_text)
|
69 |
choice = ChatCompletionResponseChoice(message=response_message)
|
70 |
completion_response = ChatCompletionResponse(
|