from fastapi import FastAPI, Request from pydantic import BaseModel import torch from transformers import AutoModelForCausalLM, AutoTokenizer import time import uuid from typing import Optional, List # --- Configuration --- MODEL_ID = "deepseek-ai/deepseek-coder-1.3b-instruct" DEVICE = "cpu" # --- Chargement du modèle et du tokenizer --- print(f"Début du chargement du modèle : {MODEL_ID}") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map=DEVICE ) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) print("Modèle et tokenizer chargés avec succès sur le CPU.") # --- Création de l'application API --- app = FastAPI() # --- Modèles de données pour la compatibilité OpenAI --- class ChatMessage(BaseModel): role: str content: str class ChatCompletionRequest(BaseModel): # CHANGEMENT 1: Rendre le champ 'model' optionnel pour éviter l'erreur 422 model: Optional[str] = None messages: List[ChatMessage] max_tokens: Optional[int] = 250 class ChatCompletionResponseChoice(BaseModel): index: int = 0 message: ChatMessage finish_reason: str = "stop" class ChatCompletionResponse(BaseModel): id: str object: str = "chat.completion" created: int model: str choices: List[ChatCompletionResponseChoice] class ModelData(BaseModel): id: str object: str = "model" created: int = int(time.time()) owned_by: str = "user" class ModelList(BaseModel): object: str = "list" data: List[ModelData] # --- Définition des API --- # CHANGEMENT 2: Ajouter un endpoint /models pour satisfaire l'extension @app.get("/models", response_model=ModelList) async def list_models(): """Endpoint pour lister les modèles disponibles.""" return ModelList(data=[ModelData(id=MODEL_ID)]) @app.post("/chat/completions", response_model=ChatCompletionResponse) async def create_chat_completion(request: ChatCompletionRequest): """Endpoint principal pour la génération de texte.""" user_prompt = "" if request.messages and request.messages[-1].role == "user": user_prompt = request.messages[-1].content if not user_prompt: return {"error": "No user prompt found"} messages_for_model = [{'role': 'user', 'content': user_prompt}] inputs = tokenizer.apply_chat_template(messages_for_model, add_generation_prompt=True, return_tensors="pt").to(DEVICE) outputs = model.generate(inputs, max_new_tokens=request.max_tokens, do_sample=True, temperature=0.2, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id) response_text = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True) response_message = ChatMessage(role="assistant", content=response_text) choice = ChatCompletionResponseChoice(message=response_message) completion_response = ChatCompletionResponse( id=f"chatcmpl-{uuid.uuid4()}", created=int(time.time()), model=MODEL_ID, choices=[choice] ) return completion_response @app.get("/") def root(): return {"status": "API compatible OpenAI en ligne", "model_id": MODEL_ID}