CHE-Master / models /resnet.py
Elfenreigen's picture
Upload 8 files
09cc9e3 verified
from functools import partial
from typing import Type, Any, Callable, Union, List, Optional
import torch
import torch.nn as nn
from torch import Tensor
from torchvision.transforms._presets import ImageClassification
from torchvision.utils import _log_api_usage_once
from torchvision.models._api import WeightsEnum, Weights
from torchvision.models._meta import _IMAGENET_CATEGORIES
from torchvision.models._utils import handle_legacy_interface, _ovewrite_named_param
import math
import torch.nn.functional as F
import random
from torch.nn.common_types import _size_1_t, _size_2_t, _size_3_t
class LoRALayer(nn.Module):
"""
Base lora class
"""
def __init__(
self,
r,
lora_alpha,
):
super().__init__()
self.r = r
self.lora_alpha = lora_alpha
# Mark the weight as unmerged
self.merged = False
def reset_parameters(self):
raise NotImplementedError
def train(self, mode:bool = True):
raise NotImplementedError
def eval(self):
raise NotImplementedError
class LoRALinear(LoRALayer):
def __init__(self, r, lora_alpha, linear_layer):
"""
LoRA class for nn.Linear class
:param r: low rank dimension
:param lora_alpha: scaling factor
:param linear_layer: target nn.Linear layer for applying Lora
"""
super().__init__(r, lora_alpha)
self.linear = linear_layer
in_features = self.linear.in_features
out_features = self.linear.out_features
# Lora configuration
self.lora_A = nn.Parameter(self.linear.weight.new_zeros((r, in_features)))
self.lora_B = nn.Parameter(self.linear.weight.new_zeros((out_features, r)))
self.scaling = self.lora_alpha / self.r
self.reset_parameters()
def reset_parameters(self):
nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
nn.init.zeros_(self.lora_B)
def train(self, mode:bool = True):
self.linear.train(mode)
if self.merged:
self.linear.weight.data -= (self.lora_B @ self.lora_A) * self.scaling
self.merged = False
def eval(self):
self.linear.eval()
if not self.merged:
self.linear.weight.data += (self.lora_B @ self.lora_A) * self.scaling
self.merged = True
def forward(self, x):
if not self.merged:
result = F.linear(x, self.linear.weight, bias=self.linear.bias)
out = (x @ self.lora_A.T @ self.lora_B.T)
result += out
return result
else:
return F.linear(x, self.linear.weight, bias=self.linear.bias)
# class LoraConv2d(LoRALayer):
# def __init__(self, r, lora_alpha, conv_layer):
# """
# LoRA class for nn.Conv2d class
# """
# super().__init__(r, lora_alpha)
# self.conv = conv_layer
# in_channels = self.conv.in_channels
# out_channels = self.conv.out_channels
# kernel_size = self.conv.kernel_size[0]
# # lora configuration
# self.lora_A = nn.Parameter(
# self.conv.weight.new_zeros((r * kernel_size, in_channels * kernel_size))
# )
# self.lora_B = nn.Parameter(
# self.conv.weight.new_zeros((out_channels * kernel_size, r * kernel_size))
# )
# self.scaling = self.lora_alpha / self.r
# self.reset_parameters()
# def reset_parameters(self):
# nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
# nn.init.zeros_(self.lora_B)
# def train(self, mode: bool = True):
# self.conv.train(mode)
# if self.merged:
# # Make sure that the weights are not merged
# self.conv.weight.data -= (self.lora_B @ self.lora_A).view(self.conv.weight.shape) * self.scaling
# self.merged = False
# def eval(self):
# self.conv.eval()
# if not self.merged:
# # Merge the weights and mark it
# self.conv.weight.data += (self.lora_B @ self.lora_A).view(self.conv.weight.shape) * self.scaling
# self.merged = True
# def forward(self, x):
# if not self.merged:
# return F.conv2d(
# x,
# self.conv.weight + (self.lora_B @ self.lora_A).view(self.conv.weight.shape) * self.scaling,
# self.conv.bias, self.conv.stride, self.conv.padding, self.conv.dilation, self.conv.groups
# )
# return self.conv(x)
class LoraConv2d(nn.Conv2d):
def __init__(
self,
r: int,
lora_alpha: float,
in_channels: int,
out_channels: int,
kernel_size: _size_2_t,
stride: _size_2_t = 1,
padding: Union[str, _size_2_t] = 0,
dilation: _size_2_t = 1,
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros', # TODO: refine this type
device=None,
dtype=None
):
"""
LoRA class for nn.Conv2d class
"""
super().__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, device, dtype)
self.r = r
self.lora_alpha = lora_alpha
# lora configuration
self.lora_A = nn.Parameter(
self.weight.new_zeros((r * kernel_size, in_channels * kernel_size))
)
self.lora_B = nn.Parameter(
self.weight.new_zeros((out_channels * kernel_size, r * kernel_size))
)
self.scaling = self.lora_alpha / self.r
self.reset_parameters_lora()
self.merged = False
self.drop_lora_rate = 0.9
def reset_parameters_lora(self):
nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
nn.init.zeros_(self.lora_B)
def train(self, mode: bool = True):
super().train(mode)
if self.merged:
# Make sure that the weights are not merged
self.weight.data -= (self.lora_B @ self.lora_A).view(self.weight.shape) * self.scaling
self.merged = False
def eval(self):
super().eval()
if not self.merged:
# Merge the weights and mark it
self.weight.data += (self.lora_B @ self.lora_A).view(self.weight.shape) * self.scaling
self.merged = True
def forward(self, x):
# 产生一个随机数
# drop_rate = random.random()
# # 训练过程中以一定的概率不使用lora
# if drop_rate <= self.drop_lora_rate and self.training:
# return F.conv2d(
# x,
# self.weight,
# self.bias, self.stride, self.padding, self.dilation, self.groups
# )
# else:
return F.conv2d(
x,
self.weight + (self.lora_B @ self.lora_A).view(self.weight.shape) * self.scaling,
self.bias, self.stride, self.padding, self.dilation, self.groups
)
class MultiLoRALinear(LoRALayer):
def __init__(self, r, lora_alpha, linear_layer,lora_num):
"""
LoRA class for nn.Linear class
:param r: low rank dimension
:param lora_alpha: scaling factor
:param linear_layer: target nn.Linear layer for applying Lora
"""
super().__init__(r,lora_alpha)
self.linear = linear_layer
self.lora_num = lora_num
self.r_list = r
in_features = self.linear.in_features
out_features = self.linear.out_features
# Lora configuration
self.lora_A_list = nn.ParameterList([nn.Parameter(self.linear.weight.new_zeros((self.r_list[th], in_features))) for th in range(self.lora_num)])
self.lora_B_list = nn.ParameterList([nn.Parameter(self.linear.weight.new_zeros((out_features, self.r_list[th]))) for th in range(self.lora_num)])
# self.lora_A = nn.Parameter(self.linear.weight.new_zeros((r, in_features)))
# self.lora_B = nn.Parameter(self.linear.weight.new_zeros((out_features, r)))
self.scaling = [self.lora_alpha / self.r_list[th] for th in range(self.lora_num)]
self.reset_parameters()
def reset_parameters(self):
for th in range(self.lora_num):
nn.init.kaiming_uniform_(self.lora_A_list[th], a=math.sqrt(5))
nn.init.zeros_(self.lora_B_list[th])
def train(self, mode:bool = True):
self.linear.train(mode)
def eval(self):
self.linear.eval()
def forward(self, x, weights):
if not self.merged:
result = F.linear(x, self.linear.weight, bias=self.linear.bias) # (247, batch, 768)
out_stack = torch.stack([(x @ self.lora_A_list[th].T @ self.lora_B_list[th].T) * self.scaling[th] for th in range(self.lora_num)], dim=2) # (2353,16,3,768)
# (247, batch, lora_num, 768)
# weights = weights.unsqueeze(0).unsqueeze(-1)
# (1, batch, lora_num, 1)
# out = torch.sum(out_stack * weights,dim=2)
out = torch.sum(out_stack, dim=2)
# (247, batch, 768)
result += out
# (247, batch, 768)
return result
else:
return F.linear(x, self.linear.weight, bias=self.linear.bias)
class MultiLoraConv2d(LoRALayer):
def __init__(self, r, lora_alpha, conv_layer, num_task):
"""
LoRA class for nn.Conv2d class
"""
super().__init__(r, lora_alpha)
self.conv = conv_layer
self.num_task = num_task
in_channels = self.conv.in_channels
out_channels = self.conv.out_channels
kernel_size = self.conv.kernel_size[0]
# lora configuration
self.lora_A_list = nn.ParameterList([nn.Parameter(self.conv.weight.new_zeros((r * kernel_size, in_channels * kernel_size))) for th in range(num_task)])
self.lora_B_list = nn.ParameterList([nn.Parameter(self.conv.weight.new_zeros((out_channels * kernel_size, r * kernel_size))) for th in range(num_task)])
self.scaling = self.lora_alpha / self.r
self.reset_parameters()
self.merged = False
self.label_batch = None
def reset_parameters(self):
for th in range(self.num_task):
nn.init.kaiming_uniform_(self.lora_A_list[th], a=math.sqrt(5))
nn.init.zeros_(self.lora_B_list[th])
def train(self, mode: bool = True):
self.conv.train(mode)
def eval(self):
self.conv.eval()
def forward(self, input_x, alphas=None):
if not self.merged:
conv_weight_stack = torch.cat([(self.lora_B_list[th] @ self.lora_A_list[th]).view(self.conv.weight.shape).unsqueeze(0) * self.scaling for th in range(self.num_task)], dim=0)
if isinstance(input_x, dict):
# print('input is dict')
x, alphas = input_x[0], input_x[1]
else:
x = input_x
batch_size, c = x.shape[0], x.shape[1]
# print(alphas)
if alphas==None:
print('在lora_fast里才是none')
agg_weights = self.conv.weight + torch.sum(
torch.mul(conv_weight_stack.unsqueeze(0), alphas.view(batch_size, -1, 1, 1, 1, 1)), dim=1)
agg_weights = agg_weights.view(-1, *agg_weights.shape[-3:])
x_grouped = x.view(1, -1, *x.shape[-2:])
outputs = F.conv2d(x_grouped, agg_weights, self.conv.bias, self.conv.stride, self.conv.padding, self.conv.dilation, groups=batch_size)
outputs = outputs.view(batch_size, -1, *outputs.shape[-2:])
return outputs
else:
return self.conv(x)
def merged_weight(self, th): # only for test
self.conv.weight.data += (self.lora_B_list[th] @ self.lora_A_list[th]).view(self.conv.weight.shape) * self.scaling
self.merged = True
__all__ = [
"ResNet",
"ResNet18_Weights",
"ResNet34_Weights",
"ResNet50_Weights",
"ResNet101_Weights",
"ResNet152_Weights",
"ResNeXt50_32X4D_Weights",
"ResNeXt101_32X8D_Weights",
"ResNeXt101_64X4D_Weights",
"Wide_ResNet50_2_Weights",
"Wide_ResNet101_2_Weights",
"resnet18",
"resnet34",
"resnet50",
"resnet101",
"resnet152",
"resnext50_32x4d",
"resnext101_32x8d",
"resnext101_64x4d",
"wide_resnet50_2",
"wide_resnet101_2",
]
def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation,
)
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
def conv3x3_lora(r: int, lora_alpha: float, in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
"""3x3 convolution with padding"""
return LoraConv2d(
r,lora_alpha,
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation,
)
def conv1x1_lora(r: int, lora_alpha: float, in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
"""1x1 convolution"""
return LoraConv2d(r, lora_alpha, in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock_Lora(nn.Module):
expansion: int = 1
def __init__(
self,
inplanes: int,
planes: int,
r: int,
lora_alpha: float,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError("BasicBlock only supports groups=1 and base_width=64")
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3_lora(r, lora_alpha, inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3_lora(r, lora_alpha, planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class BasicBlock(nn.Module):
expansion: int = 1
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError("BasicBlock only supports groups=1 and base_width=64")
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion: int = 4
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.0)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck_Lora(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion: int = 4
def __init__(
self,
inplanes: int,
planes: int,
r: int,
lora_alpha: float,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.0)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1_lora(r, lora_alpha, inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3_lora(r, lora_alpha, width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1_lora(r, lora_alpha, width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(
self,
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
num_classes: int = 1000,
zero_init_residual: bool = False,
groups: int = 1,
width_per_group: int = 64,
replace_stride_with_dilation: Optional[List[bool]] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
_log_api_usage_once(self)
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError(
"replace_stride_with_dilation should be None "
f"or a 3-element tuple, got {replace_stride_with_dilation}"
)
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck) and m.bn3.weight is not None:
nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def _make_layer(
self,
block: Type[Union[BasicBlock, Bottleneck]],
planes: int,
blocks: int,
stride: int = 1,
dilate: bool = False,
) -> nn.Sequential:
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
)
)
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer,
)
)
return nn.Sequential(*layers)
def _forward_impl(self, x: Tensor) -> Tensor:
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
class ResNet_Lora(nn.Module):
def __init__(
self,
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
r: int,
lora_alpha: float,
num_classes: int = 1000,
zero_init_residual: bool = False,
groups: int = 1,
width_per_group: int = 64,
replace_stride_with_dilation: Optional[List[bool]] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
_log_api_usage_once(self)
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError(
"replace_stride_with_dilation should be None "
f"or a 3-element tuple, got {replace_stride_with_dilation}"
)
self.groups = groups
self.base_width = width_per_group
self.r = r
self.lora_alpha = lora_alpha
self.conv1 = LoraConv2d(self.r, self.lora_alpha, 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
elif isinstance(m, LoraConv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck) and m.bn3.weight is not None:
nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def _make_layer(
self,
block: Type[Union[BasicBlock, Bottleneck]],
planes: int,
blocks: int,
stride: int = 1,
dilate: bool = False,
) -> nn.Sequential:
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1_lora(self.r, self.lora_alpha, self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes, planes, self.r, self.lora_alpha, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
)
)
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
self.r,
self.lora_alpha,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer,
)
)
return nn.Sequential(*layers)
def _forward_impl(self, x: Tensor) -> Tensor:
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
def _resnet(
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
weights: Optional[WeightsEnum],
progress: bool,
**kwargs: Any,
) -> ResNet:
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = ResNet(block, layers, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
return model
def _resnet_lora(
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
r: int,
lora_alpha: float,
weights: Optional[WeightsEnum],
progress: bool,
**kwargs: Any,
) -> ResNet_Lora:
if weights is not None:
_ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
model = ResNet_Lora(block, layers, r, lora_alpha, **kwargs)
if weights is not None:
missing_keys, unexpected_keys = model.load_state_dict(weights.get_state_dict(progress=progress), strict=False)
for key_name in missing_keys:
if 'lora_A' in key_name or 'lora_B' in key_name:
pass
else:
raise ValueError(f'{key_name} in missing keys')
if unexpected_keys != []:
raise ValueError(f'Have unexpected keys {unexpected_keys}')
return model
_COMMON_META = {
"min_size": (1, 1),
"categories": _IMAGENET_CATEGORIES,
}
class ResNet18_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnet18-f37072fd.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 11689512,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
"_metrics": {
"ImageNet-1K": {
"acc@1": 69.758,
"acc@5": 89.078,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
DEFAULT = IMAGENET1K_V1
class ResNet34_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnet34-b627a593.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 21797672,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
"_metrics": {
"ImageNet-1K": {
"acc@1": 73.314,
"acc@5": 91.420,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
DEFAULT = IMAGENET1K_V1
class ResNet50_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnet50-0676ba61.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 25557032,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
"_metrics": {
"ImageNet-1K": {
"acc@1": 76.130,
"acc@5": 92.862,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/resnet50-11ad3fa6.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 25557032,
"recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621",
"_metrics": {
"ImageNet-1K": {
"acc@1": 80.858,
"acc@5": 95.434,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
class ResNet101_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnet101-63fe2227.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 44549160,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
"_metrics": {
"ImageNet-1K": {
"acc@1": 77.374,
"acc@5": 93.546,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/resnet101-cd907fc2.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 44549160,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
"_metrics": {
"ImageNet-1K": {
"acc@1": 81.886,
"acc@5": 95.780,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
class ResNet152_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnet152-394f9c45.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 60192808,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
"_metrics": {
"ImageNet-1K": {
"acc@1": 78.312,
"acc@5": 94.046,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/resnet152-f82ba261.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 60192808,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
"_metrics": {
"ImageNet-1K": {
"acc@1": 82.284,
"acc@5": 96.002,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
class ResNeXt50_32X4D_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 25028904,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext",
"_metrics": {
"ImageNet-1K": {
"acc@1": 77.618,
"acc@5": 93.698,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/resnext50_32x4d-1a0047aa.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 25028904,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
"_metrics": {
"ImageNet-1K": {
"acc@1": 81.198,
"acc@5": 95.340,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
class ResNeXt101_32X8D_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 88791336,
"recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext",
"_metrics": {
"ImageNet-1K": {
"acc@1": 79.312,
"acc@5": 94.526,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/resnext101_32x8d-110c445d.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 88791336,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres",
"_metrics": {
"ImageNet-1K": {
"acc@1": 82.834,
"acc@5": 96.228,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
class ResNeXt101_64X4D_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/resnext101_64x4d-173b62eb.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 83455272,
"recipe": "https://github.com/pytorch/vision/pull/5935",
"_metrics": {
"ImageNet-1K": {
"acc@1": 83.246,
"acc@5": 96.454,
}
},
"_docs": """
These weights were trained from scratch by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V1
class Wide_ResNet50_2_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 68883240,
"recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439",
"_metrics": {
"ImageNet-1K": {
"acc@1": 78.468,
"acc@5": 94.086,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/wide_resnet50_2-9ba9bcbe.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 68883240,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres",
"_metrics": {
"ImageNet-1K": {
"acc@1": 81.602,
"acc@5": 95.758,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
class Wide_ResNet101_2_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
url="https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
**_COMMON_META,
"num_params": 126886696,
"recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439",
"_metrics": {
"ImageNet-1K": {
"acc@1": 78.848,
"acc@5": 94.284,
}
},
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
IMAGENET1K_V2 = Weights(
url="https://download.pytorch.org/models/wide_resnet101_2-d733dc28.pth",
transforms=partial(ImageClassification, crop_size=224, resize_size=232),
meta={
**_COMMON_META,
"num_params": 126886696,
"recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
"_metrics": {
"ImageNet-1K": {
"acc@1": 82.510,
"acc@5": 96.020,
}
},
"_docs": """
These weights improve upon the results of the original paper by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
""",
},
)
DEFAULT = IMAGENET1K_V2
@handle_legacy_interface(weights=("pretrained", ResNet18_Weights.IMAGENET1K_V1))
def resnet18(*, weights: Optional[ResNet18_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet18_Weights.verify(weights)
return _resnet(BasicBlock, [2, 2, 2, 2], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet34_Weights.IMAGENET1K_V1))
def resnet34(*, weights: Optional[ResNet34_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet34_Weights.verify(weights)
return _resnet(BasicBlock, [3, 4, 6, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet50_Weights.IMAGENET1K_V1))
def resnet50(*, weights: Optional[ResNet50_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet50_Weights.verify(weights)
return _resnet(Bottleneck, [3, 4, 6, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet50_Weights.IMAGENET1K_V1))
def resnet50_lora(*, r: int, lora_alpha: float, weights: Optional[ResNet50_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet50_Weights.verify(weights)
return _resnet_lora(Bottleneck_Lora, [3, 4, 6, 3], r, lora_alpha, weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet101_Weights.IMAGENET1K_V1))
def resnet101(*, weights: Optional[ResNet101_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet101_Weights.verify(weights)
return _resnet(Bottleneck, [3, 4, 23, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet101_Weights.IMAGENET1K_V1))
def resnet101_lora(*, r: int, lora_alpha: float, weights: Optional[ResNet101_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet101_Weights.verify(weights)
return _resnet_lora(Bottleneck_Lora, [3, 4, 23, 3], r, lora_alpha, weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet152_Weights.IMAGENET1K_V1))
def resnet152(*, weights: Optional[ResNet152_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet152_Weights.verify(weights)
return _resnet(Bottleneck, [3, 8, 36, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNet152_Weights.IMAGENET1K_V1))
def resnet152_lora(*, r: int, lora_alpha: float, weights: Optional[ResNet152_Weights] = None, progress: bool = True, **kwargs: Any) -> ResNet:
weights = ResNet152_Weights.verify(weights)
return _resnet_lora(Bottleneck_Lora, [3, 8, 36, 3], r, lora_alpha, weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNeXt50_32X4D_Weights.IMAGENET1K_V1))
def resnext50_32x4d(
*, weights: Optional[ResNeXt50_32X4D_Weights] = None, progress: bool = True, **kwargs: Any
) -> ResNet:
weights = ResNeXt50_32X4D_Weights.verify(weights)
_ovewrite_named_param(kwargs, "groups", 32)
_ovewrite_named_param(kwargs, "width_per_group", 4)
return _resnet(Bottleneck, [3, 4, 6, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", ResNeXt101_32X8D_Weights.IMAGENET1K_V1))
def resnext101_32x8d(
*, weights: Optional[ResNeXt101_32X8D_Weights] = None, progress: bool = True, **kwargs: Any
) -> ResNet:
weights = ResNeXt101_32X8D_Weights.verify(weights)
_ovewrite_named_param(kwargs, "groups", 32)
_ovewrite_named_param(kwargs, "width_per_group", 8)
return _resnet(Bottleneck, [3, 4, 23, 3], weights, progress, **kwargs)
def resnext101_64x4d(
*, weights: Optional[ResNeXt101_64X4D_Weights] = None, progress: bool = True, **kwargs: Any
) -> ResNet:
weights = ResNeXt101_64X4D_Weights.verify(weights)
_ovewrite_named_param(kwargs, "groups", 64)
_ovewrite_named_param(kwargs, "width_per_group", 4)
return _resnet(Bottleneck, [3, 4, 23, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", Wide_ResNet50_2_Weights.IMAGENET1K_V1))
def wide_resnet50_2(
*, weights: Optional[Wide_ResNet50_2_Weights] = None, progress: bool = True, **kwargs: Any
) -> ResNet:
weights = Wide_ResNet50_2_Weights.verify(weights)
_ovewrite_named_param(kwargs, "width_per_group", 64 * 2)
return _resnet(Bottleneck, [3, 4, 6, 3], weights, progress, **kwargs)
@handle_legacy_interface(weights=("pretrained", Wide_ResNet101_2_Weights.IMAGENET1K_V1))
def wide_resnet101_2(
*, weights: Optional[Wide_ResNet101_2_Weights] = None, progress: bool = True, **kwargs: Any
) -> ResNet:
weights = Wide_ResNet101_2_Weights.verify(weights)
_ovewrite_named_param(kwargs, "width_per_group", 64 * 2)
return _resnet(Bottleneck, [3, 4, 23, 3], weights, progress, **kwargs)
# The dictionary below is internal implementation detail and will be removed in v0.15
from torchvision.models._utils import _ModelURLs
model_urls = _ModelURLs(
{
"resnet18": ResNet18_Weights.IMAGENET1K_V1.url,
"resnet34": ResNet34_Weights.IMAGENET1K_V1.url,
"resnet50": ResNet50_Weights.IMAGENET1K_V1.url,
"resnet101": ResNet101_Weights.IMAGENET1K_V1.url,
"resnet152": ResNet152_Weights.IMAGENET1K_V1.url,
"resnext50_32x4d": ResNeXt50_32X4D_Weights.IMAGENET1K_V1.url,
"resnext101_32x8d": ResNeXt101_32X8D_Weights.IMAGENET1K_V1.url,
"wide_resnet50_2": Wide_ResNet50_2_Weights.IMAGENET1K_V1.url,
"wide_resnet101_2": Wide_ResNet101_2_Weights.IMAGENET1K_V1.url,
}
)
if __name__ == '__main__':
model = resnet50_lora(r=16, lora_alpha=16, weights='ResNet50_Weights.IMAGENET1K_V2')