Spaces:

abhisheksan
/

poetica

Running

App Files Files Community

abhisheksan commited on Nov 9, 2024

Commit

0d8e806

1 Parent(s): ad71c98

Implement model initialization and health check endpoints; add model download logic, error handling, and request validation

Browse files

Files changed (1) hide show

main.py +86 -1

main.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import os
 from fastapi import FastAPI, HTTPException, status
 from pathlib import Path
 import logging
 import sys
 from ctransformers import AutoModelForCausalLM
 BASE_DIR = Path("/app")
 MODEL_DIR = BASE_DIR / "models"
@@ -169,4 +172,86 @@ async def startup_event():
             "Check model_loading.log for details."
         )
     else:
-        logger.info("Application started successfully with model loaded")

 import os
+from typing import Optional
 from fastapi import FastAPI, HTTPException, status
 from pathlib import Path
 import logging
 import sys
+from pydantic import BaseModel
 from ctransformers import AutoModelForCausalLM
 BASE_DIR = Path("/app")
 MODEL_DIR = BASE_DIR / "models"
             "Check model_loading.log for details."
         )
     else:
+        logger.info("Application started successfully with model loaded")
+@app.on_event("startup")
+async def startup():
+    """Initialize the model during startup"""
+    global model
+    logger.info("Starting application...")
+    if not MODEL_PATH:
+        logger.error("MODEL_PATH is not defined in config")
+        return
+    # Try to download model if it doesn't exist
+    if not MODEL_PATH.exists():
+        try:
+            logger.info("Model not found, attempting download...")
+            download_model()
+        except Exception as e:
+            logger.error(f"Failed to download model: {str(e)}")
+            return
+    # Initialize model
+    model = initialize_model(MODEL_PATH)
+    if model is None:
+        logger.warning(
+            "Model failed to load. Service will start but /generate endpoint will be unavailable. "
+            "Check model_loading.log for details."
+        )
+    else:
+        logger.info("Application started successfully with model loaded")
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "model_loaded": model is not None
+    }
+class GenerateRequest(BaseModel):
+    prompt: str
+    max_tokens: Optional[int] = 512
+    temperature: Optional[float] = 0.7
+@app.post("/generate")
+async def generate_text(request: GenerateRequest):
+    """Generate text from the model"""
+    if model is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Model is not loaded. Please check server logs."
+        )
+    try:
+        # Generate response from the model
+        response = model(
+            request.prompt,
+            max_new_tokens=request.max_tokens,
+            temperature=request.temperature
+        )
+        return {
+            "generated_text": response,
+            "prompt": request.prompt,
+            "max_tokens": request.max_tokens,
+            "temperature": request.temperature
+        }
+    except Exception as e:
+        logger.error(f"Error generating text: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error generating text: {str(e)}"
+        )
+# Optional shutdown event
+@app.on_event("shutdown")
+async def shutdown():
+    """Cleanup on shutdown"""
+    global model
+    if model is not None:
+        del model
+        model = None
+        logger.info("Model unloaded during shutdown")