MnistStudio / scripts /model.py
Shilpaj's picture
Feat: Logic for model training and inference
6f5f635
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self, kernels=[32, 64, 128]):
super(Net, self).__init__()
# First Convolutional Block
self.conv1 = nn.Conv2d(1, kernels[0], 3, padding=1)
self.bn1 = nn.BatchNorm2d(kernels[0])
# Second Convolutional Block
self.conv2 = nn.Conv2d(kernels[0], kernels[1], 3, padding=1)
self.bn2 = nn.BatchNorm2d(kernels[1])
# Third Convolutional Block
self.conv3 = nn.Conv2d(kernels[1], kernels[2], 3, padding=1)
self.bn3 = nn.BatchNorm2d(kernels[2])
self.pool = nn.MaxPool2d(2, 2)
self.dropout = nn.Dropout(0.25)
# Calculate the size after convolutions and pooling
# Input: 28x28 -> after three pooling layers: 7x7
# Final feature map size will be kernels[2] x 7 x 7
self.fc1 = nn.Linear(kernels[2] * 7 * 7, 256)
self.fc1_bn = nn.BatchNorm1d(256)
self.fc2 = nn.Linear(256, 10)
# Initialize weights
self._initialize_weights()
def forward(self, x):
# First conv block
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.pool(x) # 28x28 -> 14x14
# Second conv block
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.pool(x) # 14x14 -> 7x7
# Third conv block
x = self.conv3(x)
x = self.bn3(x)
x = F.relu(x)
# No pooling here to maintain spatial dimensions
# Flatten
x = x.view(-1, self.num_flat_features(x))
x = self.dropout(x)
# Fully connected layers
x = self.fc1(x)
x = self.fc1_bn(x)
x = F.relu(x)
x = self.dropout(x)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
# Xavier initialization for CONV layers
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
# Xavier initialization for FC layers
nn.init.xavier_uniform_(m.weight)
nn.init.zeros_(m.bias)