Spaces:

louiecerv
/

asl_model_uploader

Sleeping

App Files Files Community

louiecerv commited on Feb 11

Commit

1cc1116

1 Parent(s): 37c7e43

save changes

Browse files

Files changed (6) hide show

__pycache__/utils.cpython-313.pyc +0 -0
app.py +80 -128
backup.py +204 -0
model_repo +1 -0
requirements.txt +3 -2
utils.py +53 -0

__pycache__/utils.cpython-313.pyc ADDED Viewed

Binary file (3.24 kB). View file

app.py CHANGED Viewed

@@ -7,113 +7,72 @@ from torch.utils.data import DataLoader
 from datasets import load_dataset
 from huggingface_hub import HfApi, Repository
 import os
 # Hugging Face Hub credentials
 HF_TOKEN = os.getenv("HF_TOKEN")
-MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation"
-DATASET_REPO_ID = "louiecerv/american_sign_language"
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Define the CNN model
-class CNN(nn.Module):
-    def __init__(self):
-        super(CNN, self).__init__()
-        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
-        self.relu1 = nn.ReLU()
-        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
-        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
-        self.relu2 = nn.ReLU()
-        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
-        self.flatten = nn.Flatten()
-        self.fc = nn.Linear(64 * 7 * 7, 128)  # Adjusted for 28x28 images
-        self.relu3 = nn.ReLU()
-        self.fc2 = nn.Linear(128, 25)  # 25 classes (A-Y)
     def forward(self, x):
-        x = self.pool1(self.relu1(self.conv1(x)))
-        x = self.pool2(self.relu2(self.conv2(x)))
-        x = self.flatten(x)
-        x = self.relu3(self.fc(x))
-        x = self.fc2(x)
-        return x
-# Create a model card
-def create_model_card():
-    model_card = """
-    ---
-    language: en
-    tags:
-    - image-classification
-    - deep-learning
-    - cnn
-    license: apache-2.0
-    datasets:
- Network (CNN) designed to recognize American Sign Language (ASL) letters from images. It was trained on the `louiecerv/american_sign_language` dataset.
-    ## Model Description
-    The model consists of two convolutional layers followed by max-pooling layers, a flattening layer, and two fully connected layers. It is designed to classify images of ASL letters into 25 classes (A-Y).
-    ## Intended Uses & Limitations
-    This model is intended for educational purposes and as a demonstration of image classification using CNNs. It is not suitable for real-world applications without further validation and testing.
-    ## How to Use
-    ```python
-    import torch
-    from torchvision import transforms
-    from PIL import Image
-    # Load the model
-    model = CNN()
-    model.load_state_dict(torch.load("path_to_model/pytorch_model.bin"))
-    model.eval()
-    # Preprocess the image
-    transform = transforms.Compose([
-        transforms.Grayscale(num_output_channels=1),
-        transforms.Resize((28, 28)),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.5], std=[0.5])
-    ])
-    image = Image.open("path_to_image").convert("RGB")
-    image = transform(image).unsqueeze(0)
-    # Make a prediction
-    with torch.no_grad():
-        output = model(image)
-        _, predicted = torch.max(output.data, 1)
-    print(f"Predicted ASL letter: {predicted.item()}")
-    ```
-    ## Training Data
-    The model was trained on the `louiecerv/american_sign_language` dataset, which contains images of ASL letters.
-    ## Training Procedure
-    The model was trained using the Adam optimizer with a learning rate of 0.001 and a batch size of 64. The training process included 5 epochs.
-    ## Evaluation Results
-    The model achieved an accuracy of 92% on the validation set.
-    """
-    with open("model_repo/README.md", "w") as f:
-        f.write(model_card)
 # Streamlit app
 def main():
     st.title("American Sign Language Recognition")
     # Load the dataset from Hugging Face Hub
     dataset = load_dataset(DATASET_REPO_ID)
-    # Data loaders with preprocessing:
-    transform = transforms.Compose([
-        transforms.Normalize(mean=[0.5], std=[0.5])  # Adjust mean and std if needed
     ])
     def collate_fn(batch):
@@ -121,18 +80,18 @@ def main():
         labels = []
         for item in batch:
             if 'pixel_values' in item and 'label' in item:
-                image = torch.tensor(item['pixel_values'])  # Convert to tensor
                 label = item['label']
                 try:
-                    image = transform(image)
                     images.append(image)
                     labels.append(label)
                 except Exception as e:
                     print(f"Error processing image: {e}")
-                    continue  # Skip to the next image
-        if not images:  # Check if the list is empty!
-            return torch.tensor([]), torch.tensor([])  # Return empty tensors if no images loaded
         images = torch.stack(images).to(device)
         labels = torch.tensor(labels).long().to(device)
@@ -142,59 +101,52 @@ def main():
     val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn)
     # Model, loss, and optimizer
-    model = CNN().to(device)
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), lr=0.001)
-    # Training loop
-    num_epochs = st.slider("Number of Epochs", 1, 20, 5)  # Streamlit slider
-    if st.button("Train Model"):
         for epoch in range(num_epochs):
             for i, (images, labels) in enumerate(train_loader):
-                if images.nelement() == 0:  # Check if images tensor is empty
                     continue
                 # Forward pass
                 outputs = model(images)
                 loss = criterion(outputs, labels)
                 # Backward and optimize
                 optimizer.zero_grad()
                 loss.backward()
                 optimizer.step()
-                if (i + 1) % 100 == 0:
-                    st.write(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
-        # Validation
-        correct = 0
-        total = 0
-        with torch.no_grad():
-            for images, labels in val_loader:
-                if images.nelement() == 0:  # Check if images tensor is empty
-                    continue
-                outputs = model(images)
                 _, predicted = torch.max(outputs.data, 1)
                 total += labels.size(0)
                 correct += (predicted == labels).sum().item()
-        if total > 0:
-            accuracy = 100 * correct / total
-            st.write(f'Accuracy of the model on the validation images: {accuracy:.2f}%')
-        else:
-            st.write("No validation images were processed.")
-        # Save model to Hugging Face Hub
-        if HF_TOKEN:
-            repo = Repository(local_dir="model_repo", clone_from=MODEL_REPO_ID, use_auth_token=HF_TOKEN)
-            model_path = os.path.join(repo.local_dir, "pytorch_model.bin")
-            torch.save(model.state_dict(), model_path)
-            create_model_card()
-            repo.push_to_hub(commit_message="Trained model and model card", blocking=True)
-            st.write(f"Model and model card saved to {MODEL_REPO_ID}")
-        else:
-            st.warning("HF_TOKEN environment variable not set. Model not saved.")
 if __name__ == "__main__":
-    main()

 from datasets import load_dataset
 from huggingface_hub import HfApi, Repository
 import os
+import matplotlib.pyplot as plt
+import utils
 # Hugging Face Hub credentials
 HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation"
+DATASET_REPO_ID = "louiecerv/american_sign_language"
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+st.write(f"Device: {device}")
+# Define the new CNN model
+IMG_HEIGHT = 28
+IMG_WIDTH = 28
+IMG_CHS = 1
+N_CLASSES = 24
+class MyConvBlock(nn.Module):
+    def __init__(self, in_ch, out_ch, dropout_p):
+        kernel_size = 3
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, kernel_size, stride=1, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(),
+            nn.Dropout(dropout_p),
+            nn.MaxPool2d(2, stride=2)
+        )
     def forward(self, x):
+        return self.model(x)
+flattened_img_size = 75 * 3 * 3
+# Input 1 x 28 x 28
+base_model = nn.Sequential(
+    MyConvBlock(IMG_CHS, 25, 0), # 25 x 14 x 14
+    MyConvBlock(25, 50, 0.2), # 50 x 7 x 7
+    MyConvBlock(50, 75, 0), # 75 x 3 x 3
+    nn.Flatten(),
+    nn.Linear(flattened_img_size, 512),
+    nn.Dropout(.3),
+    nn.ReLU(),
+    nn.Linear(512, N_CLASSES)
+)
 # Streamlit app
 def main():
     st.title("American Sign Language Recognition")
+    # Move slider and button to sidebar
+    num_epochs = st.sidebar.slider("Number of Epochs", 1, 20, 5)
+    train_button = st.sidebar.button("Train Model")
     # Load the dataset from Hugging Face Hub
     dataset = load_dataset(DATASET_REPO_ID)
+    # Data loaders with preprocessing and data augmentation:
+    random_transforms = transforms.Compose([
+        transforms.RandomRotation(5),
+        transforms.RandomResizedCrop((IMG_WIDTH, IMG_HEIGHT), scale=(.9, 1), ratio=(1, 1)),
+        transforms.RandomHorizontalFlip(),
+        transforms.ColorJitter(brightness=.2, contrast=.5),
+        transforms.Normalize(mean=[0.5], std=[0.5])
     ])
     def collate_fn(batch):
         labels = []
         for item in batch:
             if 'pixel_values' in item and 'label' in item:
+                image = torch.tensor(item['pixel_values'])
                 label = item['label']
                 try:
+                    image = random_transforms(image)
                     images.append(image)
                     labels.append(label)
                 except Exception as e:
                     print(f"Error processing image: {e}")
+                    continue
+        if not images:
+            return torch.tensor([]), torch.tensor([])
         images = torch.stack(images).to(device)
         labels = torch.tensor(labels).long().to(device)
     val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn)
     # Model, loss, and optimizer
+    model = base_model.to(device)
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), lr=0.001)
+    loss_history = []
+    accuracy_history = []
+    if train_button:
         for epoch in range(num_epochs):
+            total = 0
+            correct = 0
+            epoch_loss = 0
             for i, (images, labels) in enumerate(train_loader):
+                if images.nelement() == 0:
                     continue
                 # Forward pass
                 outputs = model(images)
                 loss = criterion(outputs, labels)
+                epoch_loss += loss.item()
                 # Backward and optimize
                 optimizer.zero_grad()
                 loss.backward()
                 optimizer.step()
                 _, predicted = torch.max(outputs.data, 1)
                 total += labels.size(0)
                 correct += (predicted == labels).sum().item()
+            epoch_accuracy = 100 * correct / total
+            loss_history.append(epoch_loss / len(train_loader))
+            accuracy_history.append(epoch_accuracy)
+            st.write(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%')
+        # Plot loss and accuracy
+        fig, ax1 = plt.subplots()
+        ax2 = ax1.twinx()
+        ax1.plot(loss_history, 'g-', label='Loss')
+        ax2.plot(accuracy_history, 'b-', label='Accuracy')
+        ax1.set_xlabel('Epoch')
+        ax1.set_ylabel('Loss', color='g')
+        ax2.set_ylabel('Accuracy (%)', color='b')
+        plt.title('Training Loss and Accuracy')
+        st.pyplot(fig)
 if __name__ == "__main__":
+    main()

backup.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import streamlit as st
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchvision import transforms
+from torch.utils.data import DataLoader
+from datasets import load_dataset
+from huggingface_hub import HfApi, Repository
+import os
+import matplotlib.pyplot as plt
+import utils
+# Hugging Face Hub credentials
+HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation"
+DATASET_REPO_ID = "louiecerv/american_sign_language"
+# Device configuration
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+st.write(f"Device: {device}")
+# Define the CNN model
+class CNN(nn.Module):
+    def __init__(self):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
+        self.relu1 = nn.ReLU()
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.relu2 = nn.ReLU()
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.flatten = nn.Flatten()
+        self.fc = nn.Linear(64 * 7 * 7, 128)  # Adjusted for 28x28 images
+        self.relu3 = nn.ReLU()
+        self.fc2 = nn.Linear(128, 25)  # 25 classes (A-Y)
+    def forward(self, x):
+        x = self.pool1(self.relu1(self.conv1(x)))
+        x = self.pool2(self.relu2(self.conv2(x)))
+        x = self.flatten(x)
+        x = self.relu3(self.fc(x))
+        x = self.fc2(x)
+        return x
+# Create a model card
+def create_model_card():
+    model_card = """
+    ---
+    language: en
+    tags:
+    - image-classification
+    - deep-learning
+    - cnn
+    license: apache-2.0
+    datasets:
+ Network (CNN) designed to recognize American Sign Language (ASL) letters from images. It was trained on the `louiecerv/american_sign_language` dataset.
+    ## Model Description
+    The model consists of two convolutional layers followed by max-pooling layers, a flattening layer, and two fully connected layers. It is designed to classify images of ASL letters into 25 classes (A-Y).
+    ## Intended Uses & Limitations
+    This model is intended for educational purposes and as a demonstration of image classification using CNNs. It is not suitable for real-world applications without further validation and testing.
+    ## How to Use
+    ```python
+    import torch
+    from torchvision import transforms
+    from PIL import Image
+    # Load the model
+    model = CNN()
+    model.load_state_dict(torch.load("path_to_model/pytorch_model.bin"))
+    model.eval()
+    # Preprocess the image
+    transform = transforms.Compose([
+        transforms.Grayscale(num_output_channels=1),
+        transforms.Resize((28, 28)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5], std=[0.5])
+    ])
+    image = Image.open("path_to_image").convert("RGB")
+    image = transform(image).unsqueeze(0)
+    # Make a prediction
+    with torch.no_grad():
+        output = model(image)
+        _, predicted = torch.max(output.data, 1)
+    print(f"Predicted ASL letter: {predicted.item()}")
+    ```
+    ## Training Data
+    The model was trained on the `louiecerv/american_sign_language` dataset, which contains images of ASL letters.
+    ## Training Procedure
+    The model was trained using the Adam optimizer with a learning rate of 0.001 and a batch size of 64. The training process included 5 epochs.
+    ## Evaluation Results
+    The model achieved an accuracy of 92% on the validation set.
+    """
+    with open("model_repo/README.md", "w") as f:
+        f.write(model_card)
+# Streamlit app
+def main():
+    st.title("American Sign Language Recognition")
+    # Load the dataset from Hugging Face Hub
+    dataset = load_dataset(DATASET_REPO_ID)
+    # Data loaders with preprocessing:
+    transform = transforms.Compose([
+        transforms.Normalize(mean=[0.5], std=[0.5])  # Adjust mean and std if needed
+    ])
+    def collate_fn(batch):
+        images = []
+        labels = []
+        for item in batch:
+            if 'pixel_values' in item and 'label' in item:
+                image = torch.tensor(item['pixel_values'])  # Convert to tensor
+                label = item['label']
+                try:
+                    image = transform(image)
+                    images.append(image)
+                    labels.append(label)
+                except Exception as e:
+                    print(f"Error processing image: {e}")
+                    continue  # Skip to the next image
+        if not images:  # Check if the list is empty!
+            return torch.tensor([]), torch.tensor([])  # Return empty tensors if no images loaded
+        images = torch.stack(images).to(device)
+        labels = torch.tensor(labels).long().to(device)
+        return images, labels
+    train_loader = DataLoader(dataset["train"], batch_size=64, shuffle=True, collate_fn=collate_fn)
+    val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn)
+    # Model, loss, and optimizer
+    model = CNN().to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+    # Training loop
+    num_epochs = st.slider("Number of Epochs", 1, 20, 5)  # Streamlit slider
+    if st.button("Train Model"):
+        for epoch in range(num_epochs):
+            for i, (images, labels) in enumerate(train_loader):
+                if images.nelement() == 0:  # Check if images tensor is empty
+                    continue
+                # Forward pass
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+                # Backward and optimize
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                if (i + 1) % 100 == 0:
+                    st.write(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
+        # Validation
+        correct = 0
+        total = 0
+        with torch.no_grad():
+            for images, labels in val_loader:
+                if images.nelement() == 0:  # Check if images tensor is empty
+                    continue
+                outputs = model(images)
+                _, predicted = torch.max(outputs.data, 1)
+                total += labels.size(0)
+                correct += (predicted == labels).sum().item()
+        if total > 0:
+            accuracy = 100 * correct / total
+            st.write(f'Accuracy of the model on the validation images: {accuracy:.2f}%')
+        else:
+            st.write("No validation images were processed.")
+        # Save model to Hugging Face Hub
+        if HF_TOKEN:
+            repo = Repository(local_dir="model_repo", clone_from=MODEL_REPO_ID, use_auth_token=HF_TOKEN)
+            model_path = os.path.join(repo.local_dir, "pytorch_model.bin")
+            torch.save(model.state_dict(), model_path)
+            create_model_card()
+            repo.push_to_hub(commit_message="Trained model and model card", blocking=True)
+            st.write(f"Model and model card saved to {MODEL_REPO_ID}")
+        else:
+            st.warning("HF_TOKEN environment variable not set. Model not saved.")
+if __name__ == "__main__":
+    main()

model_repo ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 3b1e03d8d415269d86b88c9df83295a9ef454bb5

requirements.txt CHANGED Viewed

@@ -5,5 +5,6 @@ huggingface_hub
 torch
 torchvision
 pandas
-Pillow  # or PIL (Pillow is the actively maintained fork)
-scikit-learn

 torch
 torchvision
 pandas
+Pillow
+scikit-learn
+matplotlib

utils.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import torch.nn as nn
+class MyConvBlock(nn.Module):
+    def __init__(self, in_ch, out_ch, dropout_p):
+        kernel_size = 3
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, kernel_size, stride=1, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(),
+            nn.Dropout(dropout_p),
+            nn.MaxPool2d(2, stride=2)
+        )
+    def forward(self, x):
+        return self.model(x)
+def get_batch_accuracy(output, y, N):
+    pred = output.argmax(dim=1, keepdim=True)
+    correct = pred.eq(y.view_as(pred)).sum().item()
+    return correct / N
+def train(model, train_loader, train_N, random_trans, optimizer, loss_function):
+    loss = 0
+    accuracy = 0
+    model.train()
+    for x, y in train_loader:
+        output = model(random_trans(x))
+        optimizer.zero_grad()
+        batch_loss = loss_function(output, y)
+        batch_loss.backward()
+        optimizer.step()
+        loss += batch_loss.item()
+        accuracy += get_batch_accuracy(output, y, train_N)
+    print('Train - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))
+def validate(model, valid_loader, valid_N, loss_function):
+    loss = 0
+    accuracy = 0
+    model.eval()
+    with torch.no_grad():
+        for x, y in valid_loader:
+            output = model(x)
+            loss += loss_function(output, y).item()
+            accuracy += get_batch_accuracy(output, y, valid_N)
+    print('Valid - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))