abhisheksan commited on
Commit
36d6b8e
·
verified ·
1 Parent(s): a22b331

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +53 -17
main.py CHANGED
@@ -26,7 +26,8 @@ class Config:
26
  MAX_QUEUE_SIZE = 16 # Maximum number of requests to queue
27
  QUANTIZE_MODEL = True # Enable quantization for improved performance
28
  WARMUP_INPUTS = True # Pre-warm the model with sample inputs
29
- LOG_DIR = os.path.join(os.getcwd(), 'logs')
 
30
  ENABLE_PROFILING = False # Set to True to enable performance profiling
31
  REQUEST_TIMEOUT = 30.0 # Timeout for request processing in seconds
32
 
@@ -44,20 +45,46 @@ class Config:
44
 
45
  config = Config()
46
 
47
- # Configure logging
48
- os.makedirs(config.LOG_DIR, exist_ok=True)
49
- logging.basicConfig(
50
- level=logging.INFO,
51
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
52
- handlers=[
53
- logging.StreamHandler(sys.stdout),
54
- logging.FileHandler(os.path.join(
 
 
 
 
 
 
 
 
 
 
 
 
55
  config.LOG_DIR,
56
  f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
57
- ))
58
- ]
59
- )
60
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # Request models
63
  class GenerateRequest(BaseModel):
@@ -242,6 +269,13 @@ class ModelManager:
242
  try:
243
  logger.info(f"Initializing model on device: {config.DEVICE}")
244
 
 
 
 
 
 
 
 
245
  await self.tokenization_service.initialize()
246
  await self._load_and_optimize_model()
247
 
@@ -366,13 +400,15 @@ class ModelManager:
366
  if config.DEVICE.type == 'cuda':
367
  # Set optimization flags
368
  torch.backends.cudnn.benchmark = True
369
- torch.backends.cuda.matmul.allow_tf32 = True
 
 
 
370
 
371
  # Convert model to TorchScript for faster inference
372
  try:
373
- self.model = torch.jit.optimize_for_inference(
374
- torch.jit.script(self.model)
375
- )
376
  logger.info("Model optimized with TorchScript")
377
  except Exception as e:
378
  logger.warning(f"TorchScript optimization failed: {str(e)}")
 
26
  MAX_QUEUE_SIZE = 16 # Maximum number of requests to queue
27
  QUANTIZE_MODEL = True # Enable quantization for improved performance
28
  WARMUP_INPUTS = True # Pre-warm the model with sample inputs
29
+ # Use environment-specific log directory or default to a temp directory
30
+ LOG_DIR = os.environ.get('LOG_DIR', '/tmp/poetry_logs')
31
  ENABLE_PROFILING = False # Set to True to enable performance profiling
32
  REQUEST_TIMEOUT = 30.0 # Timeout for request processing in seconds
33
 
 
45
 
46
  config = Config()
47
 
48
+ # Configure logging with proper error handling
49
+ def setup_logging():
50
+ logger = logging.getLogger(__name__)
51
+ logger.setLevel(logging.INFO)
52
+
53
+ formatter = logging.Formatter(
54
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
55
+ )
56
+
57
+ # Always add stdout handler
58
+ console_handler = logging.StreamHandler(sys.stdout)
59
+ console_handler.setFormatter(formatter)
60
+ logger.addHandler(console_handler)
61
+
62
+ # Try to set up file handler, but handle permission issues gracefully
63
+ try:
64
+ # Attempt to create directory if it doesn't exist
65
+ os.makedirs(config.LOG_DIR, exist_ok=True)
66
+
67
+ log_file = os.path.join(
68
  config.LOG_DIR,
69
  f'poetry_generation_{datetime.now().strftime("%Y%m%d")}.log'
70
+ )
71
+
72
+ # Test if we can write to the file
73
+ with open(log_file, 'a') as f:
74
+ pass
75
+
76
+ file_handler = logging.FileHandler(log_file)
77
+ file_handler.setFormatter(formatter)
78
+ logger.addHandler(file_handler)
79
+ print(f"Log file created at: {log_file}")
80
+ except (PermissionError, OSError) as e:
81
+ print(f"Warning: Could not create log file: {e}")
82
+ print(f"Continuing with console logging only.")
83
+
84
+ return logger
85
+
86
+ # Initialize logger
87
+ logger = setup_logging()
88
 
89
  # Request models
90
  class GenerateRequest(BaseModel):
 
269
  try:
270
  logger.info(f"Initializing model on device: {config.DEVICE}")
271
 
272
+ # Check if model file exists
273
+ if not os.path.exists(config.MODEL_PATH):
274
+ logger.error(f"Model file not found at {config.MODEL_PATH}")
275
+ # Try to create directory in case it doesn't exist
276
+ os.makedirs(os.path.dirname(config.MODEL_PATH), exist_ok=True)
277
+ return False
278
+
279
  await self.tokenization_service.initialize()
280
  await self._load_and_optimize_model()
281
 
 
400
  if config.DEVICE.type == 'cuda':
401
  # Set optimization flags
402
  torch.backends.cudnn.benchmark = True
403
+
404
+ # Enable TF32 precision if available (on A100 GPUs)
405
+ if hasattr(torch.backends.cuda, 'matmul') and hasattr(torch.backends.cuda.matmul, 'allow_tf32'):
406
+ torch.backends.cuda.matmul.allow_tf32 = True
407
 
408
  # Convert model to TorchScript for faster inference
409
  try:
410
+ # Use a safer approach to TorchScript optimization
411
+ self.model = torch.jit.script(self.model)
 
412
  logger.info("Model optimized with TorchScript")
413
  except Exception as e:
414
  logger.warning(f"TorchScript optimization failed: {str(e)}")