import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, kernels=[32, 64, 128]):
        super(Net, self).__init__()
        # First Convolutional Block
        self.conv1 = nn.Conv2d(1, kernels[0], 3, padding=1)
        self.bn1 = nn.BatchNorm2d(kernels[0])
        
        # Second Convolutional Block
        self.conv2 = nn.Conv2d(kernels[0], kernels[1], 3, padding=1)
        self.bn2 = nn.BatchNorm2d(kernels[1])
        
        # Third Convolutional Block
        self.conv3 = nn.Conv2d(kernels[1], kernels[2], 3, padding=1)
        self.bn3 = nn.BatchNorm2d(kernels[2])
        
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.25)
        
        # Calculate the size after convolutions and pooling
        # Input: 28x28 -> after three pooling layers: 7x7
        # Final feature map size will be kernels[2] x 7 x 7
        self.fc1 = nn.Linear(kernels[2] * 7 * 7, 256)
        self.fc1_bn = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 10)
        
        # Initialize weights
        self._initialize_weights()

    def forward(self, x):
        # First conv block
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.pool(x)  # 28x28 -> 14x14
        
        # Second conv block
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)  # 14x14 -> 7x7
        
        # Third conv block
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        # No pooling here to maintain spatial dimensions
        
        # Flatten
        x = x.view(-1, self.num_flat_features(x))
        x = self.dropout(x)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.fc1_bn(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # Xavier initialization for CONV layers
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                # Xavier initialization for FC layers
                nn.init.xavier_uniform_(m.weight)
                nn.init.zeros_(m.bias)