Spaces:

abhisheksan
/

poetica

Running

App Files Files Community

abhisheksan commited on Nov 10, 2024

Commit

23e1d87

1 Parent(s): 0d8e806

Refactor model initialization logic; improve error handling and logging for better diagnostics

Browse files

Files changed (1) hide show

main.py +221 -207

main.py CHANGED Viewed

@@ -1,57 +1,24 @@
 import os
-from typing import Optional
 from fastapi import FastAPI, HTTPException, status
 from pathlib import Path
 import logging
 import sys
-from pydantic import BaseModel
 from ctransformers import AutoModelForCausalLM
 BASE_DIR = Path("/app")
 MODEL_DIR = BASE_DIR / "models"
 MODEL_NAME = "llama-2-7b-chat.q4_K_M.gguf"
 MODEL_PATH = MODEL_DIR / MODEL_NAME
-# Create FastAPI app
-app = FastAPI(title="LLM API Server")
-# Ensure model directory exists
-MODEL_DIR.mkdir(parents=True, exist_ok=True)
-# Model download URL
 MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
-def download_model():
-    """Download the model if it doesn't exist"""
-    import requests
-    from tqdm import tqdm
-    if MODEL_PATH.exists():
-        logger.info(f"Model already exists at {MODEL_PATH}")
-        return
-    logger.info(f"Downloading model to {MODEL_PATH}")
-    try:
-        response = requests.get(MODEL_URL, stream=True)
-        response.raise_for_status()
-        total_size = int(response.headers.get('content-length', 0))
-        with open(MODEL_PATH, 'wb') as file, tqdm(
-            desc="Downloading",
-            total=total_size,
-            unit='iB',
-            unit_scale=True,
-            unit_divisor=1024,
-        ) as pbar:
-            for data in response.iter_content(chunk_size=1024):
-                size = file.write(data)
-                pbar.update(size)
-        logger.info("Model downloaded successfully")
-    except Exception as e:
-        logger.error(f"Error downloading model: {str(e)}")
-        if MODEL_PATH.exists():
-            MODEL_PATH.unlink()
-        raise
-# Enhanced logging configuration
 logging.basicConfig(
-    level=logging.DEBUG,  # Changed to DEBUG for more verbose output
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     handlers=[
         logging.StreamHandler(sys.stdout),
@@ -60,198 +27,245 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-def verify_model_path(model_path: Path) -> bool:
-    """
-    Verify that the model path exists and has correct permissions
-    """
-    try:
-        if not model_path.exists():
-            logger.error(f"Model path does not exist: {model_path}")
-            return False
-        if not model_path.is_file():
-            logger.error(f"Model path is not a file: {model_path}")
-            return False
-        # Check if file is readable
-        if not os.access(model_path, os.R_OK):
-            logger.error(f"Model file is not readable: {model_path}")
-            return False
-        # Check file size (should be at least 1MB)
-        if model_path.stat().st_size < 1_000_000:
-            logger.error(f"Model file seems too small: {model_path.stat().st_size} bytes")
-            return False
-        return True
-    except Exception as e:
-        logger.error(f"Error verifying model path: {str(e)}")
-        return False
-def initialize_model(model_path: Path):
-    """
-    Initialize the model with enhanced error handling and logging
-    """
-    logger.info("Starting model initialization...")
-    try:
-        # Verify model path first
-        if not verify_model_path(model_path):
-            logger.error("Model path verification failed")
-            return None
-        # Log model loading attempt
-        logger.info(f"Attempting to load model from: {model_path}")
-        logger.debug(f"Parent directory: {model_path.parent}")
-        logger.debug(f"Model filename: {model_path.name}")
-        # Try to initialize the model
-        model = AutoModelForCausalLM.from_pretrained(
-            str(model_path.parent),
-            model_file=model_path.name,
-            model_type="llama",
-            max_new_tokens=512,
-            context_length=512,
-            gpu_layers=0  # CPU only
-        )
-        # Verify model loaded successfully
-        if model is None:
-            logger.error("Model initialization returned None")
-            return None
-        # Test model with a simple input
-        try:
-            test_output = model("Test", max_new_tokens=5)
-            logger.info("Model successfully generated test output")
-        except Exception as e:
-            logger.error(f"Model failed basic inference test: {str(e)}")
-            return None
-        logger.info("Model initialized successfully")
-        return model
-    except ImportError as e:
-        logger.error(f"Failed to import required modules: {str(e)}")
-        logger.error("Please ensure ctransformers is installed: pip install ctransformers")
-        return None
-    except RuntimeError as e:
-        logger.error(f"Runtime error during model initialization: {str(e)}")
-        return None
-    except Exception as e:
-        logger.error(f"Unexpected error during model initialization: {str(e)}")
-        logger.exception("Full traceback:")
-        return None
-# Modified startup event
-async def startup_event():
-    """Initialize the model during startup with enhanced error handling"""
-    global model
-    logger.info("Starting application...")
-    if not MODEL_PATH:
-        logger.error("MODEL_PATH is not defined in config")
-        return
-    # Try to download model if it doesn't exist
-    if not MODEL_PATH.exists():
         try:
-            logger.info("Model not found, attempting download...")
-            download_model()
         except Exception as e:
-            logger.error(f"Failed to download model: {str(e)}")
-            return
-    # Initialize model
-    model = initialize_model(MODEL_PATH)
-    if model is None:
-        logger.warning(
-            "Model failed to load. Service will start but /generate endpoint will be unavailable. "
-            "Check model_loading.log for details."
         )
-    else:
-        logger.info("Application started successfully with model loaded")
 @app.on_event("startup")
 async def startup():
     """Initialize the model during startup"""
-    global model
-    logger.info("Starting application...")
-    if not MODEL_PATH:
-        logger.error("MODEL_PATH is not defined in config")
-        return
-    # Try to download model if it doesn't exist
-    if not MODEL_PATH.exists():
-        try:
-            logger.info("Model not found, attempting download...")
-            download_model()
-        except Exception as e:
-            logger.error(f"Failed to download model: {str(e)}")
-            return
-    # Initialize model
-    model = initialize_model(MODEL_PATH)
-    if model is None:
-        logger.warning(
-            "Model failed to load. Service will start but /generate endpoint will be unavailable. "
-            "Check model_loading.log for details."
-        )
-    else:
-        logger.info("Application started successfully with model loaded")
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
     return {
         "status": "healthy",
-        "model_loaded": model is not None
     }
-class GenerateRequest(BaseModel):
-    prompt: str
-    max_tokens: Optional[int] = 512
-    temperature: Optional[float] = 0.7
 @app.post("/generate")
 async def generate_text(request: GenerateRequest):
-    """Generate text from the model"""
-    if model is None:
-        raise HTTPException(
-            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="Model is not loaded. Please check server logs."
-        )
-    try:
-        # Generate response from the model
-        response = model(
-            request.prompt,
-            max_new_tokens=request.max_tokens,
-            temperature=request.temperature
-        )
-        return {
-            "generated_text": response,
-            "prompt": request.prompt,
-            "max_tokens": request.max_tokens,
-            "temperature": request.temperature
-        }
-    except Exception as e:
-        logger.error(f"Error generating text: {str(e)}")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Error generating text: {str(e)}"
-        )
-# Optional shutdown event
 @app.on_event("shutdown")
 async def shutdown():
     """Cleanup on shutdown"""
-    global model
-    if model is not None:
-        del model
-        model = None
-        logger.info("Model unloaded during shutdown")

 import os
+from typing import Optional, Dict, Any, Literal
+from enum import Enum
 from fastapi import FastAPI, HTTPException, status
 from pathlib import Path
 import logging
 import sys
+from pydantic import BaseModel, Field, validator
 from ctransformers import AutoModelForCausalLM
+from dataclasses import dataclass
+# Constants
 BASE_DIR = Path("/app")
 MODEL_DIR = BASE_DIR / "models"
 MODEL_NAME = "llama-2-7b-chat.q4_K_M.gguf"
 MODEL_PATH = MODEL_DIR / MODEL_NAME
 MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
+# Logging configuration
 logging.basicConfig(
+    level=logging.DEBUG,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     handlers=[
         logging.StreamHandler(sys.stdout),
 )
 logger = logging.getLogger(__name__)
+# Aligned with frontend enums
+class PoemStyle(str, Enum):
+    SONNET = "sonnet"
+    HAIKU = "haiku"
+    FREE_VERSE = "free-verse"
+    VILLANELLE = "villanelle"
+class EmotionalTone(str, Enum):
+    CONTEMPLATIVE = "contemplative"
+    JOYFUL = "joyful"
+    MELANCHOLIC = "melancholic"
+    ROMANTIC = "romantic"
+class Length(str, Enum):
+    SHORT = "short"  # 100 words
+    MEDIUM = "medium"  # 200 words
+    LONG = "long"  # 300 words
+@dataclass
+class StyleConfig:
+    """Maps style parameters to model parameters"""
+    temperature: float
+    top_p: float
+    top_k: int
+    repetition_penalty: float
+    max_tokens: int
+class StyleMapper:
+    """Maps style preferences to model parameters"""
+    @staticmethod
+    def get_style_config(
+        style: PoemStyle,
+        emotional_tone: EmotionalTone,
+        creative_style: float,  # 0-100
+        language_variety: float,  # 0-1
+        length: Length,
+        word_repetition: float,  # 1-2
+    ) -> StyleConfig:
+        # Base configuration
+        config = {
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "top_k": 40,
+            "repetition_penalty": 1.1,
+            "max_tokens": 512
+        }
+        # Map creative_style (0-100) to temperature (0.5-1.0)
+        config["temperature"] = 0.5 + (creative_style / 100) * 0.5
+        # Map length to tokens (assuming average word is 5 tokens)
+        length_token_map = {
+            Length.SHORT: 500,    # ~100 words
+            Length.MEDIUM: 1000,  # ~200 words
+            Length.LONG: 1500,    # ~300 words
+        }
+        config["max_tokens"] = length_token_map[length]
+        # Map language_variety (0-1) to top_p
+        config["top_p"] = 0.7 + (language_variety * 0.3)
+        # Map word_repetition (1-2) to repetition_penalty
+        config["repetition_penalty"] = word_repetition
+        # Adjust based on emotional tone
+        tone_temp_adjustment = {
+            EmotionalTone.CONTEMPLATIVE: 0.0,
+            EmotionalTone.JOYFUL: 0.1,
+            EmotionalTone.MELANCHOLIC: -0.1,
+            EmotionalTone.ROMANTIC: 0.2
+        }
+        config["temperature"] += tone_temp_adjustment[emotional_tone]
+        # Clamp temperature between 0.5 and 1.0
+        config["temperature"] = max(0.5, min(1.0, config["temperature"]))
+        return StyleConfig(**config)
+class GenerateRequest(BaseModel):
+    prompt: str
+    style: PoemStyle
+    emotionalTone: EmotionalTone
+    creativeStyle: float = Field(ge=0, le=100)  # 0-100 slider
+    languageVariety: float = Field(ge=0, le=1)  # 0-1 slider
+    length: Length
+    wordRepetition: float = Field(ge=1, le=2)  # 1-2 slider
+    @validator('creativeStyle')
+    def validate_creative_style(cls, v):
+        if not 0 <= v <= 100:
+            raise ValueError('creativeStyle must be between 0 and 100')
+        return v
+    @validator('languageVariety')
+    def validate_language_variety(cls, v):
+        if not 0 <= v <= 1:
+            raise ValueError('languageVariety must be between 0 and 1')
+        return v
+    @validator('wordRepetition')
+    def validate_word_repetition(cls, v):
+        if not 1 <= v <= 2:
+            raise ValueError('wordRepetition must be between 1 and 2')
+        return v
+class ModelManager:
+    def __init__(self):
+        self.model = None
+    async def initialize(self):
+        """Initialize the model with error handling"""
+        if not MODEL_PATH.exists():
+            await self.download_model()
+        self.model = self.initialize_model(MODEL_PATH)
+        return self.model is not None
+    @staticmethod
+    async def download_model():
+        """Download the model if it doesn't exist"""
+        import requests
+        from tqdm import tqdm
+        if MODEL_PATH.exists():
+            return
+        logger.info(f"Downloading model to {MODEL_PATH}")
         try:
+            response = requests.get(MODEL_URL, stream=True)
+            response.raise_for_status()
+            total_size = int(response.headers.get('content-length', 0))
+            with open(MODEL_PATH, 'wb') as file, tqdm(
+                desc="Downloading",
+                total=total_size,
+                unit='iB',
+                unit_scale=True,
+                unit_divisor=1024,
+            ) as pbar:
+                for data in response.iter_content(chunk_size=1024):
+                    size = file.write(data)
+                    pbar.update(size)
         except Exception as e:
+            logger.error(f"Error downloading model: {str(e)}")
+            if MODEL_PATH.exists():
+                MODEL_PATH.unlink()
+            raise
+    def initialize_model(self, model_path: Path):
+        """Initialize the model with the specified configuration"""
+        try:
+            model = AutoModelForCausalLM.from_pretrained(
+                str(model_path.parent),
+                model_file=model_path.name,
+                model_type="llama",
+                max_new_tokens=1500,  # Support for longest poems
+                context_length=2048,
+                gpu_layers=0
+            )
+            return model
+        except Exception as e:
+            logger.error(f"Error initializing model: {str(e)}")
+            return None
+    def generate(self, request: GenerateRequest) -> Dict[str, Any]:
+        """Generate text based on the request and style parameters"""
+        if self.model is None:
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+                detail="Model not loaded"
+            )
+        # Get style configuration
+        style_config = StyleMapper.get_style_config(
+            request.style,
+            request.emotionalTone,
+            request.creativeStyle,
+            request.languageVariety,
+            request.length,
+            request.wordRepetition
         )
+        try:
+            # Prepare prompt based on style
+            style_prompts = {
+                PoemStyle.SONNET: "Write a sonnet about",
+                PoemStyle.HAIKU: "Write a haiku about",
+                PoemStyle.FREE_VERSE: "Write a free verse poem about",
+                PoemStyle.VILLANELLE: "Write a villanelle about"
+            }
+            styled_prompt = f"{style_prompts[request.style]} {request.prompt}"
+            response = self.model(
+                styled_prompt,
+                max_new_tokens=style_config.max_tokens,
+                temperature=style_config.temperature,
+                top_p=style_config.top_p,
+                top_k=style_config.top_k,
+                repetition_penalty=style_config.repetition_penalty
+            )
+            return {
+                "generated_text": response,
+                "prompt": styled_prompt,
+                "style_config": style_config.__dict__
+            }
+        except Exception as e:
+            logger.error(f"Error generating text: {str(e)}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=str(e)
+            )
+# Create FastAPI app and model manager
+app = FastAPI(title="Poetry Generation API")
+model_manager = ModelManager()
 @app.on_event("startup")
 async def startup():
     """Initialize the model during startup"""
+    await model_manager.initialize()
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
     return {
         "status": "healthy",
+        "model_loaded": model_manager.model is not None
     }
 @app.post("/generate")
 async def generate_text(request: GenerateRequest):
+    """Generate text with style parameters"""
+    return model_manager.generate(request)
 @app.on_event("shutdown")
 async def shutdown():
     """Cleanup on shutdown"""
+    if model_manager.model is not None:
+        del model_manager.model