Spaces:

Shilpaj
/

MnistStudio

Sleeping

App Files Files Community

Shilpaj commited on Nov 17, 2024

Commit

30d27e9

1 Parent(s): f9b762f

Feat: Completed logic for multiple models training and comparison

Browse files

Files changed (5) hide show

app.py +62 -64
scripts/training/train.py +266 -8
static/js/train.js +6 -6
static/js/train_compare.js +159 -6
templates/train_compare.html +110 -40

app.py CHANGED Viewed

@@ -7,10 +7,12 @@ from typing import List, Optional
 import uvicorn
 import torch
 from scripts.model import Net
-from scripts.training.train import train
 from pathlib import Path
 from fastapi import BackgroundTasks
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning, module="torchvision.transforms")
@@ -83,7 +85,9 @@ async def train_model(config: TrainingConfig, background_tasks: BackgroundTasks)
 async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
     try:
         config_data = await websocket.receive_json()
         model = Net(
             kernels=[
@@ -93,28 +97,20 @@ async def websocket_endpoint(websocket: WebSocket):
             ]
         )
-        from scripts.training.config import NetworkConfig
-        config = NetworkConfig()
-        config.update(
-            block1=config_data['block1'],
-            block2=config_data['block2'],
-            block3=config_data['block3'],
-            optimizer=config_data['optimizer'],
-            batch_size=config_data['batch_size'],
-            epochs=config_data['epochs']
-        )
         print(f"Starting training with config: {config_data}")
         try:
-            # Pass "single" as model_type for single model training
             await train(model, config, websocket, model_type="single")
-            await websocket.send_json({
-                "type": "training_complete",
-                "data": {
-                    "message": "Training completed successfully!"
-                }
-            })
         except Exception as e:
             print(f"Training error: {str(e)}")
             await websocket.send_json({
@@ -128,68 +124,70 @@ async def websocket_endpoint(websocket: WebSocket):
         print("WebSocket disconnected")
     except Exception as e:
         print(f"WebSocket error: {str(e)}")
     finally:
         print("WebSocket connection closed")
 @app.websocket("/ws/compare")
-async def websocket_compare_endpoint(websocket: WebSocket):
-    await websocket.accept()
     try:
         data = await websocket.receive_json()
-        if data.get("type") == "start_comparison":
-            from scripts.training.config import NetworkConfig
-            # Create and train both models
-            model1_config = NetworkConfig()
-            model2_config = NetworkConfig()
-            # Update configs with received data
-            model1_config.update(**data["model1"])
-            model2_config.update(**data["model2"])
-            # Create models with respective configurations
-            model1 = Net(
-                kernels=[
-                    model1_config.block1,
-                    model1_config.block2,
-                    model1_config.block3
-                ]
-            )
-            model2 = Net(
-                kernels=[
-                    model2_config.block1,
-                    model2_config.block2,
-                    model2_config.block3
-                ]
-            )
-            # Train both models with appropriate model_type
-            try:
-                await train(model1, model1_config, websocket, model_type="model_1")
-                await train(model2, model2_config, websocket, model_type="model_2")
                 await websocket.send_json({
-                    "type": "comparison_complete",
-                    "data": {
-                        "message": "Training completed successfully!"
-                    }
                 })
             except Exception as e:
-                print(f"Training error: {str(e)}")
                 await websocket.send_json({
-                    "type": "training_error",
-                    "data": {
-                        "message": f"Training failed: {str(e)}"
-                    }
                 })
     except WebSocketDisconnect:
         print("WebSocket disconnected")
     except Exception as e:
-        print(f"WebSocket error: {str(e)}")
     finally:
-        print("WebSocket connection closed")
 # @app.post("/api/train_single")
 # async def train_single_model(config: TrainingConfig):

 import uvicorn
 import torch
 from scripts.model import Net
+from scripts.training.train import train, start_comparison_training
 from pathlib import Path
 from fastapi import BackgroundTasks
 import warnings
+import asyncio
+import json
 warnings.filterwarnings("ignore", category=UserWarning, module="torchvision.transforms")
 async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
     try:
+        print("WebSocket connection accepted for single model training")
         config_data = await websocket.receive_json()
+        print(f"Received config data: {config_data}")
         model = Net(
             kernels=[
             ]
         )
+        # Create TrainingConfig object for single model using **kwargs
+        config = TrainingConfig(**{
+            'block1': config_data['block1'],
+            'block2': config_data['block2'],
+            'block3': config_data['block3'],
+            'optimizer': config_data['optimizer'],
+            'batch_size': config_data['batch_size'],
+            'epochs': config_data['epochs']
+        })
         print(f"Starting training with config: {config_data}")
         try:
             await train(model, config, websocket, model_type="single")
         except Exception as e:
             print(f"Training error: {str(e)}")
             await websocket.send_json({
         print("WebSocket disconnected")
     except Exception as e:
         print(f"WebSocket error: {str(e)}")
+        await websocket.send_json({
+            "type": "training_error",
+            "data": {
+                "message": f"WebSocket error: {str(e)}"
+            }
+        })
     finally:
         print("WebSocket connection closed")
 @app.websocket("/ws/compare")
+async def websocket_endpoint(websocket: WebSocket):
+    print("\n=== New WebSocket Connection ===")
+    print("New WebSocket connection attempt")
     try:
+        await websocket.accept()
+        print("WebSocket connection accepted")
+        print("Waiting for initial message...")
         data = await websocket.receive_json()
+        print(f"Received initial message: {data}")
+        if 'action' not in data:
+            print("Error: Missing 'action' in message")
+            await websocket.send_json({
+                'status': 'error',
+                'message': 'Missing action in request'
+            })
+            return
+        if data['action'] == 'start_training':
+            if 'parameters' not in data:
+                print("Error: Missing 'parameters' in message")
                 await websocket.send_json({
+                    'status': 'error',
+                    'message': 'Missing parameters in request'
                 })
+                return
+            print("Starting training task")
+            try:
+                training_task = asyncio.create_task(start_comparison_training(
+                    websocket,
+                    data['parameters']
+                ))
+                print("Training task created, awaiting completion...")
+                await training_task
+                print("Training task completed")
             except Exception as e:
+                print(f"Error during training task: {str(e)}")
                 await websocket.send_json({
+                    'status': 'error',
+                    'message': f'Training error: {str(e)}'
                 })
+        else:
+            print(f"Unknown action received: {data['action']}")
     except WebSocketDisconnect:
         print("WebSocket disconnected")
+    except json.JSONDecodeError as e:
+        print(f"JSON decode error: {str(e)}")
     except Exception as e:
+        print(f"Unexpected error in websocket handler: {str(e)}")
     finally:
+        print("=== WebSocket Connection Closed ===\n")
 # @app.post("/api/train_single")
 # async def train_single_model(config: TrainingConfig):

scripts/training/train.py CHANGED Viewed

@@ -12,6 +12,18 @@ import urllib.request
 import shutil
 from tqdm import tqdm
 import asyncio
 def generate_model_filename(config, model_type="single"):
     """Generate a filename based on model configuration
@@ -185,7 +197,6 @@ async def train(model, config, websocket=None, model_type="single"):
             correct = 0
             total = 0
-            # Create progress bar for each epoch
             progress_bar = tqdm(
                 train_loader,
                 desc=f"Epoch {epoch+1}/{config.epochs}",
@@ -211,12 +222,6 @@ async def train(model, config, websocket=None, model_type="single"):
                 current_loss = total_loss / (batch_idx + 1)
                 current_acc = 100. * correct / total
-                # Update progress bar description
-                progress_bar.set_postfix({
-                    'loss': f'{current_loss:.4f}',
-                    'acc': f'{current_acc:.2f}%'
-                })
                 # Send training update through websocket
                 if websocket:
                     try:
@@ -226,7 +231,8 @@ async def train(model, config, websocket=None, model_type="single"):
                             'data': {
                                 'step': step,
                                 'train_loss': current_loss,
-                                'train_acc': current_acc
                             }
                         })
                     except Exception as e:
@@ -284,8 +290,260 @@ async def train(model, config, websocket=None, model_type="single"):
     except Exception as e:
         print(f"\nError during training: {e}")
         raise e
     print("\nTraining completed!")
     print(f"Best validation accuracy: {best_val_acc:.2f}%")
     return None

 import shutil
 from tqdm import tqdm
 import asyncio
+from fastapi import WebSocket
+import json
+from scripts.model import Net
+class TrainingConfig:
+    def __init__(self, params_dict):
+        self.block1 = params_dict['block1']
+        self.block2 = params_dict['block2']
+        self.block3 = params_dict['block3']
+        self.optimizer = params_dict['optimizer']
+        self.batch_size = params_dict['batch_size']
+        self.epochs = params_dict['epochs']
 def generate_model_filename(config, model_type="single"):
     """Generate a filename based on model configuration
             correct = 0
             total = 0
             progress_bar = tqdm(
                 train_loader,
                 desc=f"Epoch {epoch+1}/{config.epochs}",
                 current_loss = total_loss / (batch_idx + 1)
                 current_acc = 100. * correct / total
                 # Send training update through websocket
                 if websocket:
                     try:
                             'data': {
                                 'step': step,
                                 'train_loss': current_loss,
+                                'train_acc': current_acc,
+                                'epoch': epoch
                             }
                         })
                     except Exception as e:
     except Exception as e:
         print(f"\nError during training: {e}")
+        if websocket:
+            await websocket.send_json({
+                'type': 'training_error',
+                'data': {
+                    'message': str(e)
+                }
+            })
         raise e
     print("\nTraining completed!")
     print(f"Best validation accuracy: {best_val_acc:.2f}%")
+    if websocket:
+        await websocket.send_json({
+            'type': 'training_complete',
+            'data': {
+                'message': 'Training completed successfully!',
+                'best_val_acc': best_val_acc
+            }
+        })
     return None
+def initialize_datasets(batch_size):
+    """Initialize and return train and test datasets with dataloaders"""
+    # Ensure data is downloaded and extracted
+    print("Preparing dataset...")
+    download_and_extract_mnist_data()
+    # Paths to the extracted files
+    train_images_path = "data/MNIST/raw/train-images-idx3-ubyte"
+    train_labels_path = "data/MNIST/raw/train-labels-idx1-ubyte"
+    test_images_path = "data/MNIST/raw/t10k-images-idx3-ubyte"
+    test_labels_path = "data/MNIST/raw/t10k-labels-idx1-ubyte"
+    # Data loading
+    transform = transforms.Compose([
+        transforms.Normalize((0.1307,), (0.3081,))
+    ])
+    train_dataset = CustomMNISTDataset(train_images_path, train_labels_path, transform=transform)
+    test_dataset = CustomMNISTDataset(test_images_path, test_labels_path, transform=transform)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
+    return train_dataset, test_dataset, train_loader, test_loader
+async def start_comparison_training(websocket: WebSocket, parameters: dict):
+    print("\n=== Starting Comparison Training ===")
+    print(f"Received parameters: {json.dumps(parameters, indent=2)}")
+    try:
+        # Create models directory if it doesn't exist
+        models_dir = Path("scripts/training/models")
+        models_dir.mkdir(parents=True, exist_ok=True)
+        # Validate parameters
+        if not parameters.get('model_params'):
+            print("Error: Missing model parameters")
+            raise ValueError("Missing model parameters")
+        if not parameters.get('dataset_params'):
+            print("Error: Missing dataset parameters")
+            raise ValueError("Missing dataset parameters")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        criterion = nn.CrossEntropyLoss()
+        # Calculate total training samples once
+        train_dataset = CustomMNISTDataset(
+            "data/MNIST/raw/train-images-idx3-ubyte",
+            "data/MNIST/raw/train-labels-idx1-ubyte",
+            transform=transforms.Compose([transforms.Normalize((0.1307,), (0.3081,))])
+        )
+        total_samples = len(train_dataset)
+        # Dictionary to store best accuracies
+        best_accuracies = {}
+        # Start training models
+        for model_key, model_letter in [('model_a', 'A'), ('model_b', 'B')]:
+            print(f"\n{'='*50}")
+            print(f"Training Model {model_letter}")
+            print(f"{'='*50}")
+            model_params = parameters['model_params'][model_key]
+            # Calculate iterations per epoch for this model
+            batch_size = model_params['batch_size']
+            iterations_per_epoch = total_samples // batch_size
+            total_iterations = iterations_per_epoch * model_params['epochs']
+            # Print configuration details
+            print("\nModel Configuration:")
+            print(f"Architecture: {model_params['block1']}-{model_params['block2']}-{model_params['block3']}")
+            print(f"Optimizer: {model_params['optimizer']}")
+            print(f"Batch Size: {model_params['batch_size']}")
+            print(f"Epochs: {model_params['epochs']}")
+            print(f"Iterations per epoch: {iterations_per_epoch:,}")
+            print(f"Total iterations: {total_iterations:,}")
+            try:
+                # Initialize datasets with model-specific batch size
+                train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+                test_dataset = CustomMNISTDataset(
+                    "data/MNIST/raw/t10k-images-idx3-ubyte",
+                    "data/MNIST/raw/t10k-labels-idx1-ubyte",
+                    transform=transforms.Compose([transforms.Normalize((0.1307,), (0.3081,))])
+                )
+                test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
+                print(f"\nDataset Information:")
+                print(f"Training samples: {len(train_dataset):,}")
+                print(f"Test samples: {len(test_dataset):,}")
+                print(f"Steps per epoch: {len(train_loader):,}")
+                # Initialize model and move to device
+                model = Net(kernels=[
+                    model_params['block1'],
+                    model_params['block2'],
+                    model_params['block3']
+                ]).to(device)
+                # Print model parameters
+                total_params = sum(p.numel() for p in model.parameters())
+                trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+                print(f"\nModel Parameters:")
+                print(f"Total parameters: {total_params:,}")
+                print(f"Trainable parameters: {trainable_params:,}")
+                # Initialize optimizer
+                if model_params['optimizer'].lower() == 'adam':
+                    optimizer = optim.Adam(model.parameters())
+                else:
+                    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
+                # Train the model
+                current_iteration = 0
+                best_acc = 0  # Track best accuracy for model saving
+                for epoch in range(model_params['epochs']):
+                    model.train()
+                    total_loss = 0
+                    correct = 0
+                    total = 0
+                    # Create progress bar for each epoch
+                    progress_bar = tqdm(
+                        train_loader,
+                        desc=f"Epoch {epoch+1}/{model_params['epochs']}",
+                        unit='batch',
+                        leave=True,
+                        ncols=100
+                    )
+                    for batch_idx, (data, target) in enumerate(progress_bar):
+                        data, target = data.to(device), target.to(device)
+                        optimizer.zero_grad()
+                        output = model(data)
+                        loss = criterion(output, target)
+                        loss.backward()
+                        optimizer.step()
+                        # Calculate batch accuracy
+                        pred = output.argmax(dim=1, keepdim=True)
+                        correct += pred.eq(target.view_as(pred)).sum().item()
+                        total += target.size(0)
+                        total_loss += loss.item()
+                        # Calculate current metrics
+                        current_loss = total_loss / (batch_idx + 1)
+                        current_acc = 100. * correct / total
+                        # Update progress bar description
+                        progress_bar.set_postfix({
+                            'loss': f'{current_loss:.4f}',
+                            'acc': f'{current_acc:.2f}%'
+                        })
+                        # Send comparison-specific training update
+                        current_iteration += 1
+                        await websocket.send_json({
+                            'status': 'training',
+                            'model': model_letter,
+                            'metrics': {
+                                'iteration': current_iteration,
+                                'total_iterations': total_iterations,
+                                'loss': current_loss,
+                                'accuracy': current_acc
+                            },
+                            'epoch': epoch,
+                            'batch_size': batch_size,
+                            'iterations_per_epoch': iterations_per_epoch
+                        })
+                    # Print epoch summary
+                    print(f"\nEpoch {epoch+1} Summary:")
+                    print(f"Average Loss: {current_loss:.4f}")
+                    print(f"Accuracy: {current_acc:.2f}%")
+                    # Add validation phase at the end of each epoch
+                    model.eval()
+                    val_loss = 0
+                    val_correct = 0
+                    val_total = 0
+                    print("\nRunning validation...")
+                    with torch.no_grad():
+                        for data, target in test_loader:
+                            data, target = data.to(device), target.to(device)
+                            output = model(data)
+                            val_loss += criterion(output, target).item()
+                            pred = output.argmax(dim=1, keepdim=True)
+                            val_correct += pred.eq(target.view_as(pred)).sum().item()
+                            val_total += target.size(0)
+                    val_loss /= len(test_loader)
+                    val_acc = 100. * val_correct / val_total
+                    # Save model if it's the best so far
+                    if val_acc > best_acc:
+                        best_acc = val_acc
+                        # Generate filename with configuration
+                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                        model_filename = f"{model_key}_arch_{model_params['block1']}_{model_params['block2']}_{model_params['block3']}_opt_{model_params['optimizer'].lower()}_batch_{model_params['batch_size']}_{timestamp}.pth"
+                        model_path = models_dir / model_filename
+                        print(f"\nSaving Model {model_letter} with accuracy {val_acc:.2f}% as: {model_filename}")
+                        torch.save(model.state_dict(), model_path)
+                print(f"\nModel {model_letter} training completed")
+                print(f"Best validation accuracy: {best_acc:.2f}%")
+                # Save best accuracy for this model
+                best_accuracies[model_key] = best_acc
+            except Exception as e:
+                print(f"Error training Model {model_letter}: {str(e)}")
+                raise
+        print("\nBoth models trained successfully")
+        await websocket.send_json({
+            'status': 'complete',
+            'message': 'Training completed for both models',
+            'model_a_acc': best_accuracies.get('model_a'),
+            'model_b_acc': best_accuracies.get('model_b')
+        })
+    except Exception as e:
+        error_msg = f"Error in comparison training: {str(e)}"
+        print(error_msg)
+        await websocket.send_json({
+            'status': 'error',
+            'message': error_msg
+        })
+    finally:
+        print("=== Comparison Training Ended ===\n")

static/js/train.js CHANGED Viewed

@@ -169,24 +169,24 @@ async function compareModels() {
 function initializeComparisonCharts() {
     const lossData = [{
-        name: 'Model 1 Loss',
         x: [],
         y: [],
         type: 'scatter'
     }, {
-        name: 'Model 2 Loss',
         x: [],
         y: [],
         type: 'scatter'
     }];
     const accuracyData = [{
-        name: 'Model 1 Accuracy',
         x: [],
         y: [],
         type: 'scatter'
     }, {
-        name: 'Model 2 Accuracy',
         x: [],
         y: [],
         type: 'scatter'
@@ -209,13 +209,13 @@ function displayComparisonResults(data) {
     const logsDiv = document.getElementById('comparison-logs');
     logsDiv.innerHTML = `
         <div class="comparison-model">
-            <h4>Model 1</h4>
             <p>Final Loss: ${data.model1_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model1_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model1_results.model_name}</p>
         </div>
         <div class="comparison-model">
-            <h4>Model 2</h4>
             <p>Final Loss: ${data.model2_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model2_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model2_results.model_name}</p>

 function initializeComparisonCharts() {
     const lossData = [{
+        name: 'Model A Loss',
         x: [],
         y: [],
         type: 'scatter'
     }, {
+        name: 'Model B Loss',
         x: [],
         y: [],
         type: 'scatter'
     }];
     const accuracyData = [{
+        name: 'Model A Accuracy',
         x: [],
         y: [],
         type: 'scatter'
     }, {
+        name: 'Model B Accuracy',
         x: [],
         y: [],
         type: 'scatter'
     const logsDiv = document.getElementById('comparison-logs');
     logsDiv.innerHTML = `
         <div class="comparison-model">
+            <h4>Model A</h4>
             <p>Final Loss: ${data.model1_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model1_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model1_results.model_name}</p>
         </div>
         <div class="comparison-model">
+            <h4>Model B</h4>
             <p>Final Loss: ${data.model2_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model2_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model2_results.model_name}</p>

static/js/train_compare.js CHANGED Viewed

@@ -2,24 +2,24 @@ let ws;
 function initializeComparisonCharts() {
     const lossData = [{
-        name: 'Model 1 Loss',
         x: [],
         y: [],
         type: 'scatter'
     }, {
-        name: 'Model 2 Loss',
         x: [],
         y: [],
         type: 'scatter'
     }];
     const accuracyData = [{
-        name: 'Model 1 Accuracy',
         x: [],
         y: [],
         type: 'scatter'
     }, {
-        name: 'Model 2 Accuracy',
         x: [],
         y: [],
         type: 'scatter'
@@ -90,16 +90,169 @@ function displayComparisonResults(data) {
     const logsDiv = document.getElementById('comparison-logs');
     logsDiv.innerHTML = `
         <div class="comparison-model">
-            <h4>Model 1</h4>
             <p>Final Loss: ${data.model1_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model1_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model1_results.model_name}</p>
         </div>
         <div class="comparison-model">
-            <h4>Model 2</h4>
             <p>Final Loss: ${data.model2_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model2_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model2_results.model_name}</p>
         </div>
     `;
 }

 function initializeComparisonCharts() {
     const lossData = [{
+        name: 'Model A Loss',
         x: [],
         y: [],
         type: 'scatter'
     }, {
+        name: 'Model B Loss',
         x: [],
         y: [],
         type: 'scatter'
     }];
     const accuracyData = [{
+        name: 'Model A Accuracy',
         x: [],
         y: [],
         type: 'scatter'
     }, {
+        name: 'Model B Accuracy',
         x: [],
         y: [],
         type: 'scatter'
     const logsDiv = document.getElementById('comparison-logs');
     logsDiv.innerHTML = `
         <div class="comparison-model">
+            <h4>Model A</h4>
             <p>Final Loss: ${data.model1_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model1_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model1_results.model_name}</p>
         </div>
         <div class="comparison-model">
+            <h4>Model B</h4>
             <p>Final Loss: ${data.model2_results.history.train_loss.slice(-1)[0].toFixed(4)}</p>
             <p>Final Accuracy: ${data.model2_results.history.train_acc.slice(-1)[0].toFixed(2)}%</p>
             <p>Model Name: ${data.model2_results.model_name}</p>
         </div>
     `;
+}
+// Add these helper functions to get the parameters
+function getModelParameters() {
+    try {
+        const params = {
+            model_a: {
+                block1: parseInt(document.getElementById('model1_kernel1').value),
+                block2: parseInt(document.getElementById('model1_kernel2').value),
+                block3: parseInt(document.getElementById('model1_kernel3').value),
+                optimizer: document.getElementById('model1_optimizer').value,
+                batch_size: parseInt(document.getElementById('model1_batch_size').value),
+                epochs: parseInt(document.getElementById('model1_epochs').value)
+            },
+            model_b: {
+                block1: parseInt(document.getElementById('model2_kernel1').value),
+                block2: parseInt(document.getElementById('model2_kernel2').value),
+                block3: parseInt(document.getElementById('model2_kernel3').value),
+                optimizer: document.getElementById('model2_optimizer').value,
+                batch_size: parseInt(document.getElementById('model2_batch_size').value),
+                epochs: parseInt(document.getElementById('model2_epochs').value)
+            }
+        };
+        // Validate that all values are present and valid
+        for (const model of ['model_a', 'model_b']) {
+            for (const [key, value] of Object.entries(params[model])) {
+                if (value === null || value === undefined || Number.isNaN(value)) {
+                    throw new Error(`Invalid value for ${model} ${key}: ${value}`);
+                }
+            }
+        }
+        console.log('Collected and validated model parameters:', params);
+        return params;
+    } catch (error) {
+        console.error('Error in getModelParameters:', error);
+        throw error;
+    }
+}
+function getDatasetParameters() {
+    return {
+        batch_size: parseInt(document.getElementById('model1_batch_size').value), // Using model1's batch size for dataset
+        shuffle: true
+    };
+}
+// Update the WebSocket event listener
+document.getElementById('startComparisonBtn').addEventListener('click', function() {
+    console.log('Start Comparison button clicked');
+    // Validate form inputs before proceeding
+    const formInputs = document.querySelectorAll('input[type="number"], select');  // Added select for optimizer
+    let isValid = true;
+    let formValues = {};
+    formInputs.forEach(input => {
+        console.log(`Checking input ${input.id}: ${input.value}`);
+        formValues[input.id] = input.value;
+        if (!input.value) {
+            console.error(`Missing value for ${input.id}`);
+            isValid = false;
+        }
+    });
+    console.log('Form values:', formValues);  // Log all form values
+    if (!isValid) {
+        alert('Please fill in all required fields');
+        return;
+    }
+    // Show comparison progress section
+    document.getElementById('comparison-progress').classList.remove('hidden');
+    console.log('Initialized comparison charts');
+    initializeComparisonCharts();
+    console.log('Attempting WebSocket connection...');
+    const ws = new WebSocket(`ws://${window.location.host}/ws/compare`);
+    ws.onopen = function() {
+        console.log('WebSocket connection established');
+        const parameters = {
+            model_params: getModelParameters(),
+            dataset_params: getDatasetParameters()
+        };
+        const message = {
+            action: 'start_training',
+            parameters: parameters
+        };
+        console.log('Preparing to send message:', JSON.stringify(message, null, 2));
+        // Add a small delay to ensure WebSocket is ready
+        setTimeout(() => {
+            try {
+                ws.send(JSON.stringify(message));
+                console.log('Message sent successfully');
+            } catch (error) {
+                console.error('Error sending message:', error);
+                alert('Error sending training parameters. Please check console for details.');
+            }
+        }, 100);
+    };
+    ws.onmessage = function(event) {
+        console.log('Received WebSocket message:', event.data);
+        try {
+            const data = JSON.parse(event.data);
+            console.log('Parsed message data:', data);
+            updateTrainingProgress(data);
+        } catch (error) {
+            console.error('Error processing message:', error);
+        }
+    };
+    ws.onerror = function(error) {
+        console.error('WebSocket error:', error);
+        alert('Connection error occurred. Please check console for details.');
+    };
+    ws.onclose = function(event) {
+        console.log('WebSocket connection closed. Code:', event.code, 'Reason:', event.reason);
+    };
+});
+// Add the updateTrainingProgress function
+function updateTrainingProgress(data) {
+    if (data.status === 'training') {
+        // Update loss plot
+        Plotly.extendTraces('comparison-loss-plot', {
+            y: [[data.metrics.loss]],
+        }, [data.model === 'A' ? 0 : 1]);
+        // Update accuracy plot
+        Plotly.extendTraces('comparison-accuracy-plot', {
+            y: [[data.metrics.accuracy]],
+        }, [data.model === 'A' ? 0 : 1]);
+        // Update progress text
+        const progressText = document.getElementById('training-progress-text');
+        progressText.textContent = `Training ${data.model === 'A' ? 'Model A' : 'Model B'} - Epoch ${data.epoch + 1}`;
+    } else if (data.status === 'complete') {
+        // Handle training completion
+        document.getElementById('training-progress-text').textContent = 'Training Complete!';
+        displayComparisonResults(data.metrics);
+    } else if (data.status === 'error') {
+        // Handle error
+        console.error('Training error:', data.message);
+        alert(`Training error: ${data.message}`);
+    }
 }

templates/train_compare.html CHANGED Viewed

@@ -11,9 +11,9 @@
     <div class="container">
         <h1>Compare Models</h1>
         <div class="models-grid">
-            <!-- Model 1 Configuration -->
             <div class="model-config">
-                <h3>Model 1</h3>
                 <div class="network-config">
                     <h4>Network Architecture</h4>
                     <div class="block-config">
@@ -78,9 +78,9 @@
                 </div>
             </div>
-            <!-- Model 2 Configuration -->
             <div class="model-config">
-                <h3>Model 2</h3>
                 <div class="network-config">
                     <h4>Network Architecture</h4>
                     <div class="block-config">
@@ -157,6 +157,18 @@
             <div id="lossChart"></div>
             <div id="accuracyChart"></div>
         </div>
     </div>
     <style>
@@ -278,6 +290,28 @@
         .config-item .section-title {
             margin-bottom: 5px;
         }
     </style>
     <script>
@@ -292,13 +326,13 @@
                 {
                     x: [],
                     y: [],
-                    name: 'Model 1 Training Loss',
                     type: 'scatter'
                 },
                 {
                     x: [],
                     y: [],
-                    name: 'Model 2 Training Loss',
                     type: 'scatter'
                 }
             ];
@@ -320,13 +354,13 @@
                 {
                     x: [],
                     y: [],
-                    name: 'Model 1 Training Accuracy',
                     type: 'scatter'
                 },
                 {
                     x: [],
                     y: [],
-                    name: 'Model 2 Training Accuracy',
                     type: 'scatter'
                 }
             ];
@@ -375,55 +409,91 @@
             // Setup WebSocket connection
             ws = new WebSocket(`ws://${window.location.host}/ws/compare`);
             ws.onmessage = function(event) {
                 const data = JSON.parse(event.data);
-                if (data.type === 'training_update') {
-                    const modelIndex = data.data.model_id - 1;  // 0 for model1, 1 for model2
-                    // Update training metrics
                     Plotly.extendTraces('lossChart', {
-                        x: [[data.data.step]],
-                        y: [[data.data.train_loss]]
                     }, [modelIndex]);
                     Plotly.extendTraces('accuracyChart', {
-                        x: [[data.data.step]],
-                        y: [[data.data.train_acc]]
                     }, [modelIndex]);
                 }
-                else if (data.type === 'validation_update') {
-                    const modelIndex = data.data.model_id - 1;
-                    // Add validation points
-                    Plotly.addTraces('lossChart', {
-                        x: [data.data.step],
-                        y: [data.data.val_loss],
-                        name: `Model ${data.data.model_id} Validation Loss`,
-                        mode: 'markers',
-                        marker: { size: 8 }
-                    });
-                    Plotly.addTraces('accuracyChart', {
-                        x: [data.data.step],
-                        y: [data.data.val_acc],
-                        name: `Model ${data.data.model_id} Validation Accuracy`,
-                        mode: 'markers',
-                        marker: { size: 8 }
-                    });
                 }
-                else if (data.type === 'comparison_complete') {
                     document.getElementById('startComparison').disabled = false;
                     document.getElementById('stopComparison').disabled = true;
                 }
             };
-            // Start comparison
-            ws.send(JSON.stringify({
-                type: 'start_comparison',
-                model1: model1Config,
-                model2: model2Config
-            }));
         }
         function stopComparison() {

     <div class="container">
         <h1>Compare Models</h1>
         <div class="models-grid">
+            <!-- Model A Configuration -->
             <div class="model-config">
+                <h3>Model A</h3>
                 <div class="network-config">
                     <h4>Network Architecture</h4>
                     <div class="block-config">
                 </div>
             </div>
+            <!-- Model B Configuration -->
             <div class="model-config">
+                <h3>Model B</h3>
                 <div class="network-config">
                     <h4>Network Architecture</h4>
                     <div class="block-config">
             <div id="lossChart"></div>
             <div id="accuracyChart"></div>
         </div>
+        <!-- Add this after the charts container -->
+        <div class="training-status">
+            <p id="training-progress"></p>
+        </div>
+        <!-- Add this after the training-status div -->
+        <div class="inference-controls" style="display: none;">
+            <button id="goToInference" onclick="window.location.href='/inference'" class="inference-button">
+                Try Model Inference
+            </button>
+        </div>
     </div>
     <style>
         .config-item .section-title {
             margin-bottom: 5px;
         }
+        .training-status {
+            text-align: center;
+            margin: 20px 0;
+            font-weight: bold;
+        }
+        .inference-controls {
+            margin: 20px 0;
+            text-align: center;
+        }
+        .inference-button {
+            background-color: #28a745;
+            padding: 12px 24px;
+            font-size: 1.1em;
+            transition: background-color 0.3s;
+        }
+        .inference-button:hover {
+            background-color: #218838;
+        }
     </style>
     <script>
                 {
                     x: [],
                     y: [],
+                    name: 'Model A Training Loss',
                     type: 'scatter'
                 },
                 {
                     x: [],
                     y: [],
+                    name: 'Model B Training Loss',
                     type: 'scatter'
                 }
             ];
                 {
                     x: [],
                     y: [],
+                    name: 'Model A Training Accuracy',
                     type: 'scatter'
                 },
                 {
                     x: [],
                     y: [],
+                    name: 'Model B Training Accuracy',
                     type: 'scatter'
                 }
             ];
             // Setup WebSocket connection
             ws = new WebSocket(`ws://${window.location.host}/ws/compare`);
+            ws.onopen = function() {
+                console.log('WebSocket connection established');
+                // Only send the message after connection is established
+                const message = {
+                    action: 'start_training',
+                    parameters: {
+                        model_params: {
+                            model_a: model1Config,
+                            model_b: model2Config
+                        },
+                        dataset_params: {
+                            batch_size: model1Config.batch_size,
+                            shuffle: true
+                        }
+                    }
+                };
+                console.log('Sending message:', message);
+                ws.send(JSON.stringify(message));
+            };
             ws.onmessage = function(event) {
+                console.log('Received message:', event.data);
                 const data = JSON.parse(event.data);
+                if (data.status === 'training') {
+                    const modelIndex = data.model === 'A' ? 0 : 1;
+                    const iteration = data.metrics.iteration;
+                    console.log(`Updating charts for model ${data.model} at iteration ${iteration}`);
+                    // Update loss chart using iteration number
                     Plotly.extendTraces('lossChart', {
+                        x: [[iteration]],
+                        y: [[data.metrics.loss]]
                     }, [modelIndex]);
+                    // Update accuracy chart using iteration number
                     Plotly.extendTraces('accuracyChart', {
+                        x: [[iteration]],
+                        y: [[data.metrics.accuracy]]
                     }, [modelIndex]);
+                    // Update progress text with more detailed information
+                    const progressText = document.getElementById('training-progress');
+                    if (progressText) {
+                        const progress = (data.metrics.iteration / data.metrics.total_iterations * 100).toFixed(1);
+                        progressText.textContent =
+                            `Training Model ${data.model} - ` +
+                            `Epoch ${data.epoch + 1} - ` +
+                            `Iteration ${data.metrics.iteration}/${data.metrics.total_iterations} ` +
+                            `(${progress}%) - ` +
+                            `Batch Size: ${data.batch_size}`;
+                    }
                 }
+                else if (data.status === 'complete') {
+                    document.getElementById('startComparison').disabled = false;
+                    document.getElementById('stopComparison').disabled = true;
+                    const progressText = document.getElementById('training-progress');
+                    if (progressText) {
+                        progressText.textContent = 'Training Complete!';
+                    }
+                    // Show the inference button
+                    document.querySelector('.inference-controls').style.display = 'block';
                 }
+                else if (data.status === 'error') {
+                    console.error('Training error:', data.message);
+                    alert(`Training error: ${data.message}`);
                     document.getElementById('startComparison').disabled = false;
                     document.getElementById('stopComparison').disabled = true;
                 }
             };
+            ws.onerror = function(error) {
+                console.error('WebSocket error:', error);
+                document.getElementById('startComparison').disabled = false;
+                document.getElementById('stopComparison').disabled = true;
+            };
+            ws.onclose = function(event) {
+                console.log('WebSocket connection closed:', event);
+                document.getElementById('startComparison').disabled = false;
+                document.getElementById('stopComparison').disabled = true;
+            };
         }
         function stopComparison() {