asl_model_uploader / backup.py
louiecerv's picture
save changes
1cc1116
import streamlit as st
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from datasets import load_dataset
from huggingface_hub import HfApi, Repository
import os
import matplotlib.pyplot as plt
import utils
# Hugging Face Hub credentials
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_REPO_ID = "louiecerv/amer_sign_lang_data_augmentation"
DATASET_REPO_ID = "louiecerv/american_sign_language"
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
st.write(f"Device: {device}")
# Define the CNN model
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = nn.Flatten()
self.fc = nn.Linear(64 * 7 * 7, 128) # Adjusted for 28x28 images
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, 25) # 25 classes (A-Y)
def forward(self, x):
x = self.pool1(self.relu1(self.conv1(x)))
x = self.pool2(self.relu2(self.conv2(x)))
x = self.flatten(x)
x = self.relu3(self.fc(x))
x = self.fc2(x)
return x
# Create a model card
def create_model_card():
model_card = """
---
language: en
tags:
- image-classification
- deep-learning
- cnn
license: apache-2.0
datasets:
Network (CNN) designed to recognize American Sign Language (ASL) letters from images. It was trained on the `louiecerv/american_sign_language` dataset.
## Model Description
The model consists of two convolutional layers followed by max-pooling layers, a flattening layer, and two fully connected layers. It is designed to classify images of ASL letters into 25 classes (A-Y).
## Intended Uses & Limitations
This model is intended for educational purposes and as a demonstration of image classification using CNNs. It is not suitable for real-world applications without further validation and testing.
## How to Use
```python
import torch
from torchvision import transforms
from PIL import Image
# Load the model
model = CNN()
model.load_state_dict(torch.load("path_to_model/pytorch_model.bin"))
model.eval()
# Preprocess the image
transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
image = Image.open("path_to_image").convert("RGB")
image = transform(image).unsqueeze(0)
# Make a prediction
with torch.no_grad():
output = model(image)
_, predicted = torch.max(output.data, 1)
print(f"Predicted ASL letter: {predicted.item()}")
```
## Training Data
The model was trained on the `louiecerv/american_sign_language` dataset, which contains images of ASL letters.
## Training Procedure
The model was trained using the Adam optimizer with a learning rate of 0.001 and a batch size of 64. The training process included 5 epochs.
## Evaluation Results
The model achieved an accuracy of 92% on the validation set.
"""
with open("model_repo/README.md", "w") as f:
f.write(model_card)
# Streamlit app
def main():
st.title("American Sign Language Recognition")
# Load the dataset from Hugging Face Hub
dataset = load_dataset(DATASET_REPO_ID)
# Data loaders with preprocessing:
transform = transforms.Compose([
transforms.Normalize(mean=[0.5], std=[0.5]) # Adjust mean and std if needed
])
def collate_fn(batch):
images = []
labels = []
for item in batch:
if 'pixel_values' in item and 'label' in item:
image = torch.tensor(item['pixel_values']) # Convert to tensor
label = item['label']
try:
image = transform(image)
images.append(image)
labels.append(label)
except Exception as e:
print(f"Error processing image: {e}")
continue # Skip to the next image
if not images: # Check if the list is empty!
return torch.tensor([]), torch.tensor([]) # Return empty tensors if no images loaded
images = torch.stack(images).to(device)
labels = torch.tensor(labels).long().to(device)
return images, labels
train_loader = DataLoader(dataset["train"], batch_size=64, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(dataset["validation"], batch_size=64, collate_fn=collate_fn)
# Model, loss, and optimizer
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = st.slider("Number of Epochs", 1, 20, 5) # Streamlit slider
if st.button("Train Model"):
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
if images.nelement() == 0: # Check if images tensor is empty
continue
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
st.write(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
# Validation
correct = 0
total = 0
with torch.no_grad():
for images, labels in val_loader:
if images.nelement() == 0: # Check if images tensor is empty
continue
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
if total > 0:
accuracy = 100 * correct / total
st.write(f'Accuracy of the model on the validation images: {accuracy:.2f}%')
else:
st.write("No validation images were processed.")
# Save model to Hugging Face Hub
if HF_TOKEN:
repo = Repository(local_dir="model_repo", clone_from=MODEL_REPO_ID, use_auth_token=HF_TOKEN)
model_path = os.path.join(repo.local_dir, "pytorch_model.bin")
torch.save(model.state_dict(), model_path)
create_model_card()
repo.push_to_hub(commit_message="Trained model and model card", blocking=True)
st.write(f"Model and model card saved to {MODEL_REPO_ID}")
else:
st.warning("HF_TOKEN environment variable not set. Model not saved.")
if __name__ == "__main__":
main()