Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -26,7 +26,8 @@ class Config:
|
|
26 |
MAX_QUEUE_SIZE = 16 # Maximum number of requests to queue
|
27 |
QUANTIZE_MODEL = True # Enable quantization for improved performance
|
28 |
WARMUP_INPUTS = True # Pre-warm the model with sample inputs
|
29 |
-
|
|
|
30 |
ENABLE_PROFILING = False # Set to True to enable performance profiling
|
31 |
REQUEST_TIMEOUT = 30.0 # Timeout for request processing in seconds
|
32 |
|
@@ -44,20 +45,46 @@ class Config:
|
|
44 |
|
45 |
config = Config()
|
46 |
|
47 |
-
# Configure logging
|
48 |
-
|
49 |
-
logging.
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
config.LOG_DIR,
|
56 |
f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
|
57 |
-
)
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
# Request models
|
63 |
class GenerateRequest(BaseModel):
|
@@ -242,6 +269,13 @@ class ModelManager:
|
|
242 |
try:
|
243 |
logger.info(f"Initializing model on device: {config.DEVICE}")
|
244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
await self.tokenization_service.initialize()
|
246 |
await self._load_and_optimize_model()
|
247 |
|
@@ -366,13 +400,15 @@ class ModelManager:
|
|
366 |
if config.DEVICE.type == 'cuda':
|
367 |
# Set optimization flags
|
368 |
torch.backends.cudnn.benchmark = True
|
369 |
-
|
|
|
|
|
|
|
370 |
|
371 |
# Convert model to TorchScript for faster inference
|
372 |
try:
|
373 |
-
|
374 |
-
|
375 |
-
)
|
376 |
logger.info("Model optimized with TorchScript")
|
377 |
except Exception as e:
|
378 |
logger.warning(f"TorchScript optimization failed: {str(e)}")
|
|
|
26 |
MAX_QUEUE_SIZE = 16 # Maximum number of requests to queue
|
27 |
QUANTIZE_MODEL = True # Enable quantization for improved performance
|
28 |
WARMUP_INPUTS = True # Pre-warm the model with sample inputs
|
29 |
+
# Use environment-specific log directory or default to a temp directory
|
30 |
+
LOG_DIR = os.environ.get('LOG_DIR', '/tmp/poetry_logs')
|
31 |
ENABLE_PROFILING = False # Set to True to enable performance profiling
|
32 |
REQUEST_TIMEOUT = 30.0 # Timeout for request processing in seconds
|
33 |
|
|
|
45 |
|
46 |
config = Config()
|
47 |
|
48 |
+
# Configure logging with proper error handling
|
49 |
+
def setup_logging():
|
50 |
+
logger = logging.getLogger(__name__)
|
51 |
+
logger.setLevel(logging.INFO)
|
52 |
+
|
53 |
+
formatter = logging.Formatter(
|
54 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
55 |
+
)
|
56 |
+
|
57 |
+
# Always add stdout handler
|
58 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
59 |
+
console_handler.setFormatter(formatter)
|
60 |
+
logger.addHandler(console_handler)
|
61 |
+
|
62 |
+
# Try to set up file handler, but handle permission issues gracefully
|
63 |
+
try:
|
64 |
+
# Attempt to create directory if it doesn't exist
|
65 |
+
os.makedirs(config.LOG_DIR, exist_ok=True)
|
66 |
+
|
67 |
+
log_file = os.path.join(
|
68 |
config.LOG_DIR,
|
69 |
f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
|
70 |
+
)
|
71 |
+
|
72 |
+
# Test if we can write to the file
|
73 |
+
with open(log_file, 'a') as f:
|
74 |
+
pass
|
75 |
+
|
76 |
+
file_handler = logging.FileHandler(log_file)
|
77 |
+
file_handler.setFormatter(formatter)
|
78 |
+
logger.addHandler(file_handler)
|
79 |
+
print(f"Log file created at: {log_file}")
|
80 |
+
except (PermissionError, OSError) as e:
|
81 |
+
print(f"Warning: Could not create log file: {e}")
|
82 |
+
print(f"Continuing with console logging only.")
|
83 |
+
|
84 |
+
return logger
|
85 |
+
|
86 |
+
# Initialize logger
|
87 |
+
logger = setup_logging()
|
88 |
|
89 |
# Request models
|
90 |
class GenerateRequest(BaseModel):
|
|
|
269 |
try:
|
270 |
logger.info(f"Initializing model on device: {config.DEVICE}")
|
271 |
|
272 |
+
# Check if model file exists
|
273 |
+
if not os.path.exists(config.MODEL_PATH):
|
274 |
+
logger.error(f"Model file not found at {config.MODEL_PATH}")
|
275 |
+
# Try to create directory in case it doesn't exist
|
276 |
+
os.makedirs(os.path.dirname(config.MODEL_PATH), exist_ok=True)
|
277 |
+
return False
|
278 |
+
|
279 |
await self.tokenization_service.initialize()
|
280 |
await self._load_and_optimize_model()
|
281 |
|
|
|
400 |
if config.DEVICE.type == 'cuda':
|
401 |
# Set optimization flags
|
402 |
torch.backends.cudnn.benchmark = True
|
403 |
+
|
404 |
+
# Enable TF32 precision if available (on A100 GPUs)
|
405 |
+
if hasattr(torch.backends.cuda, 'matmul') and hasattr(torch.backends.cuda.matmul, 'allow_tf32'):
|
406 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
407 |
|
408 |
# Convert model to TorchScript for faster inference
|
409 |
try:
|
410 |
+
# Use a safer approach to TorchScript optimization
|
411 |
+
self.model = torch.jit.script(self.model)
|
|
|
412 |
logger.info("Model optimized with TorchScript")
|
413 |
except Exception as e:
|
414 |
logger.warning(f"TorchScript optimization failed: {str(e)}")
|