CVLFace Pretrained Face Alignement Model (DFA RESNET50)
๐ GitHub โข ๐ค Hugging Face
1. Introduction
Model Name: DFA RESNET50
Related Paper: KeyPoint Relative Position Encoding for Face Recognition (https://arxiv.org/abs/2403.14852)
Please cite the original paper and follow the license of the training dataset.
2. Quick Start
if __name__ == '__main__':
from transformers import AutoModel
from huggingface_hub import hf_hub_download
import shutil
import os
import sys
# helpfer function to download huggingface repo and use model
def download(repo_id, path, HF_TOKEN=None):
os.makedirs(path, exist_ok=True)
files_path = os.path.join(path, 'files.txt')
if not os.path.exists(files_path):
hf_hub_download(repo_id, 'files.txt', token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
with open(os.path.join(path, 'files.txt'), 'r') as f:
files = f.read().split('\n')
for file in [f for f in files if f] + ['config.json', 'wrapper.py', 'model.safetensors']:
full_path = os.path.join(path, file)
if not os.path.exists(full_path):
hf_hub_download(repo_id, file, token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
# helpfer function to download huggingface repo and use model
def load_model_from_local_path(path, HF_TOKEN=None):
cwd = os.getcwd()
os.chdir(path)
sys.path.insert(0, path)
model = AutoModel.from_pretrained(path, trust_remote_code=True, token=HF_TOKEN)
os.chdir(cwd)
sys.path.pop(0)
return model
# helpfer function to download huggingface repo and use model
def load_model_by_repo_id(repo_id, save_path, HF_TOKEN=None, force_download=False):
if force_download:
if os.path.exists(save_path):
shutil.rmtree(save_path)
download(repo_id, save_path, HF_TOKEN)
return load_model_from_local_path(save_path, HF_TOKEN)
if __name__ == '__main__':
# load model
HF_TOKEN = 'YOUR_HUGGINGFACE_TOKEN'
path = os.path.expanduser('~/.cvlface_cache/minchul/cvlface_DFA_resnet50')
repo_id = 'minchul/cvlface_DFA_resnet50'
aligner = load_model_by_repo_id(repo_id, path, HF_TOKEN)
# input is a rgb image normalized.
from torchvision.transforms import Compose, ToTensor, Normalize
from PIL import Image
img = Image.open('/path/to/img.png')
trans = Compose([ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
input = trans(img).unsqueeze(0) # torch.randn(1, 3, 256, 256) or any size with a single face
# predict landmarks and aligned image
aligned_x, orig_ldmks, aligned_ldmks, score, thetas, bbox = aligner(input)
# Documentation
# aligned_x: aligned face image (1, 3, 112, 112)
# orig_ldmks: predicted landmarks in the original image (1, 5, 2)
# aligned_ldmks: predicted landmarks in the aligned image (1, 5, 2)
# score: confidence score (1,)
# thetas: transformation matrix transforming (1, 2, 3). See below for how to use it.
# normalized_bbox: bounding box in the original image (1, 4)
# differentiable alignment
import torch.nn.functional as F
grid = F.affine_grid(thetas, (1, 3, 112, 112), align_corners=True)
manual_aligned_x = F.grid_sample(input, grid, align_corners=True)
# manual_aligned_x should be same as aligned_x (up to some numerical error due to interpolation error)
# here input can receive gradient through the grid_sample function.
Example Outputs
![]() |
![]() |
![]() |
Input Image | Input Image with Landmark | Aligned Image with Landmark |
Code for visualizaton
def concat_pil(list_of_pil):
w, h = list_of_pil[0].size
new_im = Image.new('RGB', (w * len(list_of_pil), h))
for i, im in enumerate(list_of_pil):
new_im.paste(im, (i * w, 0))
return new_im
def draw_ldmk(img, ldmk):
import cv2
if ldmk is None:
return img
colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)]
img = img.copy()
for i in range(5):
color = colors[i]
cv2.circle(img, (int(ldmk[i*2] * img.shape[1]),
int(ldmk[i*2+1] * img.shape[0])), 1, color, 4)
return img
def tensor_to_numpy(tensor):
# -1 to 1 tensor to 0-255
arr = tensor.numpy().transpose(1,2,0)
return (arr * 0.5 + 0.5) * 255
def visualize(tensor, ldmks=None):
assert tensor.ndim == 4
images = [tensor_to_numpy(image_tensor) for image_tensor in tensor]
if ldmks is not None:
images = [draw_ldmk(images[j], ldmks[j].ravel()) for j in range(len(images))]
pil_images = [Image.fromarray(im.astype('uint8')) for im in images]
return concat_pil(pil_images)
visualize(input, None).save('orig.png')
visualize(aligned, aligned_ldmks).save('aligned.png')
visualize(input, orig_ldmks).save('input.png')
- Downloads last month
- 56
Inference Providers
NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API:
The HF Inference API does not support model that require custom code execution.