CVLFace Pretrained Face Alignement Model (DFA RESNET50)

๐ŸŒŽ GitHub โ€ข ๐Ÿค— Hugging Face


1. Introduction

Model Name: DFA RESNET50

Related Paper: KeyPoint Relative Position Encoding for Face Recognition (https://arxiv.org/abs/2403.14852)

Please cite the original paper and follow the license of the training dataset.

2. Quick Start

if __name__ == '__main__':
    
from transformers import AutoModel
from huggingface_hub import hf_hub_download
import shutil
import os
import sys


# helpfer function to download huggingface repo and use model
def download(repo_id, path, HF_TOKEN=None):
    os.makedirs(path, exist_ok=True)
    files_path = os.path.join(path, 'files.txt')
    if not os.path.exists(files_path):
        hf_hub_download(repo_id, 'files.txt', token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
    with open(os.path.join(path, 'files.txt'), 'r') as f:
        files = f.read().split('\n')
    for file in [f for f in files if f] + ['config.json', 'wrapper.py', 'model.safetensors']:
        full_path = os.path.join(path, file)
        if not os.path.exists(full_path):
            hf_hub_download(repo_id, file, token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)

            
# helpfer function to download huggingface repo and use model
def load_model_from_local_path(path, HF_TOKEN=None):
    cwd = os.getcwd()
    os.chdir(path)
    sys.path.insert(0, path)
    model = AutoModel.from_pretrained(path, trust_remote_code=True, token=HF_TOKEN)
    os.chdir(cwd)
    sys.path.pop(0)
    return model


# helpfer function to download huggingface repo and use model
def load_model_by_repo_id(repo_id, save_path, HF_TOKEN=None, force_download=False):
    if force_download:
        if os.path.exists(save_path):
            shutil.rmtree(save_path)
    download(repo_id, save_path, HF_TOKEN)
    return load_model_from_local_path(save_path, HF_TOKEN)


if __name__ == '__main__':
    
    # load model
    HF_TOKEN = 'YOUR_HUGGINGFACE_TOKEN'
    path = os.path.expanduser('~/.cvlface_cache/minchul/cvlface_DFA_resnet50')
    repo_id = 'minchul/cvlface_DFA_resnet50'
    aligner = load_model_by_repo_id(repo_id, path, HF_TOKEN)

    # input is a rgb image normalized.
    from torchvision.transforms import Compose, ToTensor, Normalize
    from PIL import Image
    img = Image.open('/path/to/img.png')
    trans = Compose([ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
    input = trans(img).unsqueeze(0)  # torch.randn(1, 3, 256, 256) or any size with a single face
    
    # predict landmarks and aligned image
    aligned_x, orig_ldmks, aligned_ldmks, score, thetas, bbox = aligner(input)
    
    # Documentation
    # aligned_x: aligned face image (1, 3, 112, 112)
    # orig_ldmks: predicted landmarks in the original image (1, 5, 2)
    # aligned_ldmks: predicted landmarks in the aligned image (1, 5, 2)
    # score: confidence score (1,)
    # thetas: transformation matrix transforming  (1, 2, 3). See below for how to use it.
    # normalized_bbox: bounding box in the original image (1, 4)
    
    # differentiable alignment
    import torch.nn.functional as F
    grid = F.affine_grid(thetas, (1, 3, 112, 112), align_corners=True)
    manual_aligned_x = F.grid_sample(input, grid, align_corners=True)
    # manual_aligned_x should be same as aligned_x (up to some numerical error due to interpolation error)
    # here input can receive gradient through the grid_sample function.

Example Outputs

Image 1 Image 2 Image 3
Input Image Input Image with Landmark Aligned Image with Landmark
```

Code for visualizaton

def concat_pil(list_of_pil):
    w, h = list_of_pil[0].size
    new_im = Image.new('RGB', (w * len(list_of_pil), h))
    for i, im in enumerate(list_of_pil):
        new_im.paste(im, (i * w, 0))
    return new_im


def draw_ldmk(img, ldmk):
    import cv2
    if ldmk is None:
        return img
    colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]
    img = img.copy()
    for i in range(5):
        color = colors[i]
        cv2.circle(img, (int(ldmk[i*2] * img.shape[1]),
                         int(ldmk[i*2+1] * img.shape[0])), 1, color, 4)
    return img

def tensor_to_numpy(tensor):
    # -1 to 1 tensor to 0-255
    arr = tensor.numpy().transpose(1,2,0)
    return (arr * 0.5 + 0.5) * 255


def visualize(tensor, ldmks=None):
    assert tensor.ndim == 4
    images = [tensor_to_numpy(image_tensor) for image_tensor in tensor]
    if ldmks is not None:
        images = [draw_ldmk(images[j], ldmks[j].ravel()) for j in range(len(images))]
    pil_images = [Image.fromarray(im.astype('uint8')) for im in images]
    return concat_pil(pil_images)

visualize(input, None).save('orig.png')
visualize(aligned, aligned_ldmks).save('aligned.png')
visualize(input, orig_ldmks).save('input.png')
Downloads last month
56
Safetensors
Model size
27.4M params
Tensor type
F32
ยท
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The HF Inference API does not support model that require custom code execution.