Spaces:

Rasleen
/

attendance

Sleeping

App Files Files Community

Kaushik066 commited on Apr 7

Commit

151b2c7

verified ·

1 Parent(s): fc176df

change to insightFace model

Browse files

Files changed (1) hide show

app.py +164 -125

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from transformers import ViTImageProcessor
 # For Model
 from transformers import ViTModel, ViTConfig, pipeline
 # For data augmentation
 from torchvision import transforms, datasets
@@ -25,6 +27,7 @@ from torch.utils.data import Dataset, DataLoader
 # Other Generic Libraries
 import torch
 from PIL import Image
 import os
 import streamlit as st
 import gc
@@ -48,134 +51,147 @@ data_path = 'employees'
 model_path = 'vit_pytorch_GPU_1.pt'
 webcam_path = 'captured_image.jpg'
 # Set Title
 st.title("Employee Attendance System")
-#pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
 # Define Image Processor
-image_processor_prod = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
 # Define ML Model
-class FaceEmbeddingModel(torch.nn.Module):
-    def __init__(self, model_name, embedding_size):
-        super(FaceEmbeddingModel, self).__init__()
-        self.config = ViTConfig.from_pretrained(model_name, id2label=idx_to_label, label2id=label_to_idx, return_dict=True)
-        self.backbone = ViTModel.from_pretrained(model_name, config=self.config)  # Load ViT model
-        self.fc = torch.nn.Linear(self.backbone.config.hidden_size, embedding_size) # Convert to 512D feature vector
-    def forward(self, images):
-        x = self.backbone(images).last_hidden_state[:, 0]  # Extract embeddings
-        x = self.fc(x)  # Convert to 512D embedding
-        return torch.nn.functional.normalize(x)  # Normalize for cosine similarity
 # Load the model
-model_pretrained = torch.load(model_path, map_location=device, weights_only=False)
 # Define the ML model - Evaluation function
-def prod_function(transformer_model, prod_dl, webcam_dl):
-    # Initialize accelerator
-    accelerator = Accelerator()
-    # to INFO for the main process only.
-    if accelerator.is_main_process:
-        datasets.utils.logging.set_verbosity_warning()
-        transformers.utils.logging.set_verbosity_info()
-    else:
-        datasets.utils.logging.set_verbosity_error()
-        transformers.utils.logging.set_verbosity_error()
-    # The seed need to be set before we instantiate the model, as it will determine the random head.
-    set_seed(42)
-    # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
-    accelerated_model, acclerated_prod_dl, acclerated_webcam_dl = accelerator.prepare(transformer_model, prod_dl, webcam_dl)
-    # Evaluate at the end of the epoch
-    accelerated_model.eval()
-    # Find Embedding of the image to be evaluated
-    for batch in acclerated_webcam_dl:
-        with torch.no_grad():
-            #img_prod = acclerated_prod_data['pixel_values']
-            emb_prod = accelerated_model(batch['pixel_values'])
-    prod_preds = []
-    for batch in acclerated_prod_dl:
-        #img = batch['pixel_values']
-        with torch.no_grad():
-            emb = accelerated_model(batch['pixel_values'])
-        distance = F.pairwise_distance(emb, emb_prod)
-        prod_preds.append(distance)
-    return prod_preds
 # Creation of Dataloader
-class CustomDatasetProd(Dataset):
-    def __init__(self, pixel_values):
-        self.pixel_values = pixel_values
-    def __len__(self):
-        return len(self.pixel_values)
-    def __getitem__(self, idx):
-        item = {
-            'pixel_values': self.pixel_values[idx].squeeze(0),
-        }
-        return item
 # Creation of Dataset
-class CreateDatasetProd():
-    def __init__(self, image_processor):
-        super().__init__()
-        self.image_processor = image_processor
-        # Define a transformation pipeline
-        self.transform_prod = transforms.v2.Compose([
-                                    transforms.v2.ToImage(),
-                                    transforms.v2.ToDtype(torch.uint8, scale=False)
-                                ])
-    def get_pixels(self, img_paths):
-        pixel_values = []
-        for path in img_paths:
-            # Read and process Images
-            img = Image.open(path)
-            img = self.transform_prod(img)
-            # Scaling the video to ML model's desired format
-            img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
-            pixel_values.append(img['pixel_values'].squeeze(0))
-            # Force garbage collection
-            del img
-            gc.collect()
-        return pixel_values
-    def get_pixel(self, img_path):
-        # Read and process Images
-        img = Image.open(img_path)
-        img = self.transform_prod(img)
-        # Scaling the video to ML model's desired format
-        img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
-        pixel_values = img['pixel_values'] #.squeeze(0)
-        # Force garbage collection
-        del img
-        gc.collect()
-        return pixel_values
-    def create_dataset(self, image_paths, webcam=False):
-        if webcam == True:
-            pixel_values = self.get_pixel(image_paths)
-        else:
-            pixel_values = torch.stack(self.get_pixels(image_paths))
-        return CustomDatasetProd(pixel_values=pixel_values)
 # Read images from directory
 image_paths = []
 image_file = glob(os.path.join(data_path, '*.jpg'))
@@ -184,15 +200,38 @@ image_paths.extend(image_file)
 #st.write('input path size:', len(image_paths))
 #st.write(image_paths)
 # Create DataLoader for Employees image
-dataset_prod_obj = CreateDatasetProd(image_processor_prod)
-prod_ds = dataset_prod_obj.create_dataset(image_paths, webcam=False)
-prod_dl = DataLoader(prod_ds, batch_size=BATCH_SIZE)
 ## Testing the dataloader
 #prod_inputs = next(iter(prod_dl))
 #st.write(prod_inputs['pixel_values'].shape)
 about_tab, app_tab = st.tabs(["About the app", "Face Recognition"])
 # About the app Tab
 with about_tab:
@@ -231,8 +270,8 @@ with app_tab:
         #st.write('Image saved as:',webcam_path)
         ## Create DataLoader for Webcam Image
-        webcam_ds = dataset_prod_obj.create_dataset(picture, webcam=True)
-        webcam_dl = DataLoader(webcam_ds, batch_size=BATCH_SIZE)
         ## Testing the dataloader
         #prod_inputs = next(iter(webcam_dl))
@@ -240,14 +279,14 @@ with app_tab:
         with st.spinner("Wait for it...", show_time=True):
             # Run the predictions
-            prediction = prod_function(model_pretrained, prod_dl, webcam_dl)
-            predictions = torch.cat(prediction, 0).to(device)
-            match_idx = torch.argmin(predictions)
             st.write(predictions)
             st.write(image_paths)
             # Display the results
-            if predictions[match_idx] <= 0.3:
               st.write('Welcome: ',image_paths[match_idx].split('/')[-1].split('.')[0])
             else:
               st.write("Match not found")

 # For Model
 from transformers import ViTModel, ViTConfig, pipeline
+import insightface
+from insightface.app import FaceAnalysis
 # For data augmentation
 from torchvision import transforms, datasets
 # Other Generic Libraries
 import torch
 from PIL import Image
+import cv2
 import os
 import streamlit as st
 import gc
 model_path = 'vit_pytorch_GPU_1.pt'
 webcam_path = 'captured_image.jpg'
+IMAGE_SHAPE = 640
 # Set Title
 st.title("Employee Attendance System")
 # Define Image Processor
+#image_processor_prod = ViTImageProcessor.from_pretrained(MODEL_TRANSFORMER, attn_implementation="sdpa", torch_dtype=torch.float16)
 # Define ML Model
+#class FaceEmbeddingModel(torch.nn.Module):
+#    def __init__(self, model_name, embedding_size):
+#        super(FaceEmbeddingModel, self).__init__()
+#        self.config = ViTConfig.from_pretrained(model_name, id2label=idx_to_label, label2id=label_to_idx, return_dict=True)
+#        self.backbone = ViTModel.from_pretrained(model_name, config=self.config)  # Load ViT model
+#        self.fc = torch.nn.Linear(self.backbone.config.hidden_size, embedding_size) # Convert to 512D feature vector
+#
+#    def forward(self, images):
+#        x = self.backbone(images).last_hidden_state[:, 0]  # Extract embeddings
+#        x = self.fc(x)  # Convert to 512D embedding
+#        return torch.nn.functional.normalize(x)  # Normalize for cosine similarity
 # Load the model
+#model_pretrained = torch.load(model_path, map_location=device, weights_only=False)
 # Define the ML model - Evaluation function
+#def prod_function(transformer_model, prod_dl, webcam_dl):
+#    # Initialize accelerator
+#    accelerator = Accelerator()
+#
+#    # to INFO for the main process only.
+#    #if accelerator.is_main_process:
+#    #    datasets.utils.logging.set_verbosity_warning()
+#    #    transformers.utils.logging.set_verbosity_info()
+#    #else:
+#    #    datasets.utils.logging.set_verbosity_error()
+#    #    transformers.utils.logging.set_verbosity_error()
+#
+#    # The seed need to be set before we instantiate the model, as it will determine the random head.
+#    set_seed(42)
+#
+#    # There is no specific order to remember, we just need to unpack the objects in the same order we gave them to the prepare method.
+#    accelerated_model, acclerated_prod_dl, acclerated_webcam_dl = accelerator.prepare(transformer_model, prod_dl, webcam_dl)
+#
+#    # Evaluate at the end of the epoch
+#    accelerated_model.eval()
+#
+#    # Find Embedding of the image to be evaluated
+#    for batch in acclerated_webcam_dl:
+#        with torch.no_grad():
+#            #img_prod = acclerated_prod_data['pixel_values']
+#            emb_prod = accelerated_model(batch['pixel_values'])
+#
+#    prod_preds = []
+#
+#    for batch in acclerated_prod_dl:
+#        #img = batch['pixel_values']
+#        with torch.no_grad():
+#            emb = accelerated_model(batch['pixel_values'])
+#        distance = F.pairwise_distance(emb, emb_prod)
+#
+#        prod_preds.append(distance)
+#    return prod_preds
 # Creation of Dataloader
+#class CustomDatasetProd(Dataset):
+#    def __init__(self, image_path, webcam):
+#        self.image_path = image_path
+#        self.webcam = webcam
+#
+#    def __len__(self):
+#        return len(self.image_path)
+#
+#    def __getitem__(self, idx):
+#        if webcam == False:
+#            img = cv2.imread(image_path[idx])
+#        else:
+#            img = image_path
+#        faces = app.get(img)
+#
+#        if not faces:
+#            raise Exception("No face detected")
+#
+#        pixel_values = faces[0].embedding  # embedding is a 512-dimensional vector
+#        item = {
+#            'pixel_values': pixel_values.squeeze(0),
+#        }
+#        return item
 # Creation of Dataset
+#class CreateDatasetProd():
+#    def __init__(self, image_processor):
+#        super().__init__()
+#        self.image_processor = image_processor
+#        # Define a transformation pipeline
+#        self.transform_prod = transforms.v2.Compose([
+#                                    transforms.v2.ToImage(),
+#                                    transforms.v2.ToDtype(torch.uint8, scale=False)
+#                                ])
+#
+#    def get_pixels(self, img_paths):
+#        pixel_values = []
+#        for path in img_paths:
+#            # Read and process Images
+#            img = Image.open(path)
+#            img = self.transform_prod(img)
+#
+#            # Scaling the video to ML model's desired format
+#            img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
+#
+#            pixel_values.append(img['pixel_values'].squeeze(0))
+#
+#            # Force garbage collection
+#            del img
+#            gc.collect()
+#        return pixel_values
+#
+#    def get_pixel(self, img_path):
+#        # Read and process Images
+#        img = Image.open(img_path)
+#        img = self.transform_prod(img)
+#
+#        # Scaling the video to ML model's desired format
+#        img = self.image_processor(img, return_tensors='pt') #, input_data_format='channels_first')
+#
+#        pixel_values = img['pixel_values'] #.squeeze(0)
+#
+#        # Force garbage collection
+#        del img
+#        gc.collect()
+#
+#        return pixel_values
+#
+#    def create_dataset(self, image_paths, webcam=False):
+#        if webcam == True:
+#            pixel_values = self.get_pixel(image_paths)
+#        else:
+#            pixel_values = torch.stack(self.get_pixels(image_paths))
+#
+#        return CustomDatasetProd(pixel_values=pixel_values)
 # Read images from directory
 image_paths = []
 image_file = glob(os.path.join(data_path, '*.jpg'))
 #st.write('input path size:', len(image_paths))
 #st.write(image_paths)
+# Initialize the app
+app = FaceAnalysis(name="buffalo_l")  # buffalo_l includes ArcFace model
+app.prepare(ctx_id=-1, det_size=(IMAGE_SHAPE, IMAGE_SHAPE))  # Use ctx_id=-1 if you want CPU, and ctx_id=0 for GPU
 # Create DataLoader for Employees image
+#dataset_prod_obj = CreateDatasetProd(image_processor_prod)
+#prod_ds = dataset_prod_obj.create_dataset(image_paths, webcam=False)
+#prod_dl = DataLoader(prod_ds, webcam=False, batch_size=BATCH_SIZE)
 ## Testing the dataloader
 #prod_inputs = next(iter(prod_dl))
 #st.write(prod_inputs['pixel_values'].shape)
+# Define the ML model - Evaluation function
+def prod_function(app, prod_path, webcam_path):
+    webcam_img = cv2.imread(webcam_path)
+    webcam_emb = app.get(webcam_img, max_num=1)
+    webcam_emb = webcam_emb[0].embedding
+    similarity_score = []
+    for path in prod_path:
+        img = cv2.imread(path)
+        face_embedding = app.get(img, max_num=1)
+        face_embedding = face_embedding[0].embedding
+        similarity_score.append(F.cosine_similarity(face_embedding,webcam_emb, dim=0))
+        #distance = F.pairwise_distance(emb, emb_prod)
+        #prod_preds.append(distance)
+    return similarity_score #prod_preds
 about_tab, app_tab = st.tabs(["About the app", "Face Recognition"])
 # About the app Tab
 with about_tab:
         #st.write('Image saved as:',webcam_path)
         ## Create DataLoader for Webcam Image
+        #webcam_ds = dataset_prod_obj.create_dataset(picture, webcam=True)
+        #webcam_dl = DataLoader(picture, webcam=True, batch_size=BATCH_SIZE)
         ## Testing the dataloader
         #prod_inputs = next(iter(webcam_dl))
         with st.spinner("Wait for it...", show_time=True):
             # Run the predictions
+            prediction = prod_function(app, image_paths, picture)
+            #predictions = torch.cat(prediction, 0).to(device)
+            #match_idx = torch.argmin(predictions)
             st.write(predictions)
             st.write(image_paths)
             # Display the results
+            if predictions[match_idx] >= 0.9:
               st.write('Welcome: ',image_paths[match_idx].split('/')[-1].split('.')[0])
             else:
               st.write("Match not found")