Spaces:

Kaushik066
/

FaceRecognition-app-2.0

Running

App Files Files Community

FaceRecognition-app-2.0 / app.py

Kaushik066

Update app.py

2cf5ca9 verified 2 days ago

raw

history blame contribute delete

6.55 kB

	# Base Framework
	import torch
	import torch.nn as nn
	# Set the device (GPU or CPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print("device:", device)
	# For data transformation
	from torchvision import transforms
	import torchvision.models as models
	from torchvision.transforms import v2
	# For Image Transformation
	import transformers
	from transformers import ViTImageProcessor
	from transformers import set_seed
	# For Data Loaders
	import datasets
	from torch.utils.data import Dataset, DataLoader
	# For GPU
	from accelerate import Accelerator, notebook_launcher
	# General Libraries
	import os
	import PIL
	from glob import glob
	import pandas as pd
	import numpy as np
	import gradio as gr
	# Constants
	SEED = 42
	BATCH_SIZE = 1
	MODEL_TRANSFORMER = 'google/vit-base-patch16-224'
	MODEL = "IMAGENET1K_V1"
	CLIP_SIZE = 224
	data_path = 'employees'
	#model_path = 'models'
	model_path_1 = 'models/Swin_tiny_celebA_imagenetWTS_loss0232.pt'
	model_path_2 = 'models/Swin_tiny_celebA_imagenetWTS_loss0209.pt'
	image_processor = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)

	# Create Dataset - Pictures
	class CustomDataset(Dataset):
	def __init__(self, image_paths, image_processor):
	self.image_paths = image_paths # Store paths instead of images
	self.image_processor = image_processor

	def __len__(self):
	return len(self.image_paths)

	def __getitem__(self, idx):
	image_path = self.image_paths[idx]
	image_1 = PIL.Image.open(image_path).convert("RGB")
	image_1 = self.image_processor(image_1, return_tensors='pt')['pixel_values'][0]
	item = {
	'pixel_values': image_1,
	}
	return item

	# Model Class
	class SwinTEmbedding(nn.Module):
	def __init__(self, model_name="DEFAULT"):
	super().__init__()
	self.model_name = model_name
	# Load pre-trained model
	self.base_model = models.swin_t(weights=self.model_name)

	# Remove classifier by selecting only the backbone
	self.base_model_backbone = list(self.base_model.children())[0]

	def forward(self, x):
	x = self.base_model_backbone(x) # Feature extraction
	x = torch.flatten(x, start_dim=1) # Flatten output to 1D embeddings
	return x # Return as embeddings

	# Build Model
	def prod_function(reconstructed_model, prod_dl, webcam_img):
	# Initialize accelerator
	accelerator = Accelerator()

	# The seed need to be set before we instantiate the model, as it will determine the random head.
	set_seed(SEED)

	image_processor = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
	webcam_img = image_processor(webcam_img, return_tensors='pt')['pixel_values'][0]
	webcam_img = torch.unsqueeze(webcam_img, 0)
	#webcam_img = torch.rand(1, 3, 224, 224)

	# Loss function
	criterion = torch.nn.CosineSimilarity(dim=1, eps=1e-6)

	# There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
	accelerated_model, acclerated_criterion, acclerated_prod_dl, acclerated_webcam_img = accelerator.prepare(reconstructed_model, criterion, prod_dl, webcam_img)

	## Evaluate at the end of the epoch (distributed evaluation as we have 8 TPU cores)
	accelerated_model.eval()

	# Find Embedding of the webcam to be evaluated
	with torch.no_grad():
	# Extract embeddings
	webcam_emb = torch.flatten(accelerated_model(acclerated_webcam_img), start_dim=1)

	prod_predictions = []
	for batch in acclerated_prod_dl:
	with torch.no_grad():
	image_emb = torch.flatten(accelerated_model(batch['pixel_values']), start_dim=1)
	prod_predictions.append(acclerated_criterion(image_emb, webcam_emb))

	return prod_predictions

	# Function to find similariy score
	def face_recognition(webcan_img):
	# Read images from directory
	image_paths = []
	image_file = glob(os.path.join(data_path, '*.jpg'))
	image_paths.extend(image_file)
	# Convert the images to dataloader
	prod_ds = CustomDataset(image_paths=image_paths, image_processor=image_processor)
	prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)

	# Create DataLoader for Webcam Image
	#webcam_ds = CreateDatasetWebcam(pil_image=webcan_img, image_processor=image_processor)
	#webcam_dl = DataLoader(webcam_ds, batch_size=BATCH_SIZE)

	# Load Model
	recon_model = SwinTEmbedding(model_name=MODEL)
	recon_model.load_state_dict(torch.load(model_path_2, weights_only=True, map_location=torch.device('cpu')))

	# Make Predictions
	prediction = prod_function(recon_model, prod_dl, webcan_img)

	# Convert to tensor
	prediction = torch.cat(prediction, 0) #.to(device)
	# Display the similarity score
	similarity_score = dict(zip(image_paths, prediction))

	# Identify the person
	idx = prediction.argmax(-1)
	person_name = image_paths[idx].split('/')[-1].split('.')[0]

	return person_name, similarity_score

	# Function to read the about.md file
	def load_about_md():
	with open("about.md", "r") as file:
	about_content = file.read()
	return about_content

	with gr.Blocks() as demo:
	gr.Markdown("# Face Recognition app 2.0")

	# About the App
	with gr.Tab("About the App"):
	gr.Markdown(load_about_md())

	# Face recognition Tab
	with gr.Tab("Face recognition"):
	with gr.Row():
	with gr.Column(scale=0.9, variant="panel"):
	with gr.Row(height=350, variant="panel"):
	# Add webcam input for image capture
	webcam_input = gr.Image(sources=["webcam"], type="pil", label="Face Capture")
	with gr.Row(variant="panel"):
	# Submit the Video
	image_button = gr.Button("Submit")
	# Add a button or functionality to process the video
	recognition_output = gr.Textbox(label="Face Recognised as:")
	with gr.Column(scale=1, variant="panel"):
	with gr.Row():
	# Display the landmarked video
	similarity_score_output = gr.Textbox(label="Face Similarity Score:")
	# Set up the interface
	image_button.click(face_recognition, inputs=webcam_input, outputs=[recognition_output,similarity_score_output])

	if __name__ == "__main__":
	demo.launch()