|
from transformers import PreTrainedModel |
|
from timm.models.resnet import BasicBlock, Bottleneck, ResNet |
|
|
|
from transformers import PretrainedConfig |
|
from typing import List |
|
import torch |
|
import timm |
|
|
|
|
|
class ViTMAEConfig(PretrainedConfig): |
|
model_type = "vit_mae_custom" |
|
|
|
def __init__( self, model_name='timm/vit_base_patch16_224.mae', num_classes: int = 1000, **kwargs ): |
|
self.model_name = model_name |
|
self.num_classes = num_classes |
|
super().__init__(**kwargs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ViTMAEModelForImageClassification(PreTrainedModel): |
|
config_class = ViTMAEConfig |
|
|
|
def __init__(self, config): |
|
super().__init__(config) |
|
|
|
self.model = timm.create_model(config.model_name, num_classes=config.num_classes, pretrained=True) |
|
|
|
def forward(self, tensor, labels=None): |
|
logits = self.model(tensor) |
|
if labels is not None: |
|
loss = torch.nn.cross_entropy(logits, labels) |
|
return {"loss": loss, "logits": logits} |
|
return {"logits": logits} |