from fastapi import FastAPI, HTTPException, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional, Dict, Any import json import base64 from PIL import Image from io import BytesIO import uvicorn from app import llm_client # Create FastAPI application api_app = FastAPI( title="LLM Structured Output API", description="API for generating structured responses from local GGUF models via llama-cpp-python", version="1.0.0" ) # Setup CORS api_app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Data models for API class StructuredOutputRequest(BaseModel): prompt: str json_schema: Dict[str, Any] image_base64: Optional[str] = None use_grammar: bool = True class StructuredOutputResponse(BaseModel): success: bool data: Optional[Dict[str, Any]] = None error: Optional[str] = None raw_response: Optional[str] = None def decode_base64_image(base64_string: str) -> Image.Image: """Decode base64 string to PIL Image""" try: image_data = base64.b64decode(base64_string) image = Image.open(BytesIO(image_data)) return image except Exception as e: raise HTTPException(status_code=400, detail=f"Image decoding error: {str(e)}") @api_app.post("/generate", response_model=StructuredOutputResponse) async def generate_structured_output(request: StructuredOutputRequest): """ Main endpoint for generating structured response Args: request: Request containing prompt, JSON schema and optionally base64 image Returns: StructuredOutputResponse: Structured response or error """ # Check model initialization if llm_client is None: raise HTTPException( status_code=503, detail="LLM model not initialized. Check server configuration." ) try: # Validate input data if not request.prompt.strip(): raise HTTPException(status_code=400, detail="Prompt cannot be empty") if not request.json_schema: raise HTTPException(status_code=400, detail="JSON schema cannot be empty") # Decode image if provided image = None if request.image_base64: image = decode_base64_image(request.image_base64) # Generate response result = llm_client.generate_structured_response( prompt=request.prompt, json_schema=request.json_schema, image=image, use_grammar=request.use_grammar ) # Format response if "error" in result: return StructuredOutputResponse( success=False, error=result["error"], raw_response=result.get("raw_response") ) else: return StructuredOutputResponse( success=True, data=result.get("data"), raw_response=result.get("raw_response") ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") @api_app.post("/generate_with_file", response_model=StructuredOutputResponse) async def generate_with_file( prompt: str = Form(...), json_schema: str = Form(...), image: Optional[UploadFile] = File(None), use_grammar: bool = Form(True) ): """ Alternative endpoint for uploading image as file Args: prompt: Text prompt json_schema: JSON schema as string image: Uploaded image file use_grammar: Whether to use grammar-based structured output Returns: StructuredOutputResponse: Structured response or error """ # Check model initialization if llm_client is None: raise HTTPException( status_code=503, detail="LLM model not initialized. Check server configuration." ) try: # Validate input data if not prompt.strip(): raise HTTPException(status_code=400, detail="Prompt cannot be empty") if not json_schema.strip(): raise HTTPException(status_code=400, detail="JSON schema cannot be empty") # Parse JSON schema try: parsed_schema = json.loads(json_schema) except json.JSONDecodeError as e: raise HTTPException(status_code=400, detail=f"Invalid JSON schema: {str(e)}") # Process image if provided pil_image = None if image: # Check file type if not image.content_type.startswith('image/'): raise HTTPException(status_code=400, detail="Uploaded file must be an image") # Read and convert image image_data = await image.read() pil_image = Image.open(BytesIO(image_data)) # Generate response result = llm_client.generate_structured_response( prompt=prompt, json_schema=parsed_schema, image=pil_image, use_grammar=use_grammar ) # Format response if "error" in result: return StructuredOutputResponse( success=False, error=result["error"], raw_response=result.get("raw_response") ) else: return StructuredOutputResponse( success=True, data=result.get("data"), raw_response=result.get("raw_response") ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") @api_app.get("/health") async def health_check(): """API health check""" model_status = "loaded" if llm_client is not None else "not_loaded" return { "status": "healthy" if llm_client is not None else "degraded", "model_status": model_status, "message": "API is working correctly" if llm_client is not None else "API is working, but model is not loaded" } @api_app.get("/") async def root(): """Root endpoint with API information""" return { "message": "LLM Structured Output API", "version": "1.0.0", "model_loaded": llm_client is not None, "endpoints": { "/generate": "POST - main endpoint for generating structured response", "/generate_with_file": "POST - endpoint with image file upload", "/health": "GET - health check", "/docs": "GET - automatic Swagger documentation" } } if __name__ == "__main__": from config import Config uvicorn.run( "api:api_app", host=Config.HOST, port=Config.API_PORT, reload=True )