amanmibra's picture
Add gradio app
e8bade5
import sys
sys.path.append('..')
import time
# torch
import torch
import torchaudio
from torch import nn
from torch.utils.data import DataLoader
# modal
from modal import Mount, Secret, Stub, gpu, create_package_mounts
# internal
from pipelines.images import training_image_pip
# model
from dataset import VoiceDataset
from cnn import CNNetwork
# script defaults
BATCH_SIZE = 128
EPOCHS = 100
LEARNING_RATE = 0.001
TRAIN_FILE="data/aisf/augmented/train"
TEST_FILE="data/aisf/augmented/test"
SAMPLE_RATE=48000
stub = Stub(
"void-training",
image=training_image_pip,
)
@stub.function(
gpu=gpu.A100(memory=20),
mounts=[
Mount.from_local_file(local_path='dataset.py'),
Mount.from_local_file(local_path='cnn.py'),
],
timeout=EPOCHS * 200,
secret=Secret.from_name("wandb"),
)
def train(
model,
train_dataloader,
loss_fn,
optimizer,
origin_device="cuda",
epochs=10,
test_dataloader=None,
wandb_enabled=False,
):
import os
import time
import torch
import wandb
print("Begin model training...")
begin = time.time()
modal_device = origin_device
# set model to cuda
if torch.cuda.is_available() and modal_device != "cuda":
modal_device = "cuda"
model = model.to(modal_device)
# metrics
training_acc = []
training_loss = []
testing_acc = []
testing_loss = []
if wandb_enabled:
wandb.init(project="void-training")
for i in range(epochs):
print(f"Epoch {i + 1}/{epochs}")
then = time.time()
# train model
model, train_epoch_loss, train_epoch_acc = train_epoch.call(model, train_dataloader, loss_fn, optimizer, modal_device)
# training metrics
training_loss.append(train_epoch_loss/len(train_dataloader))
training_acc.append(train_epoch_acc/len(train_dataloader))
if wandb_enabled:
wandb.log({'training_loss': training_loss[i], 'training_acc': training_acc[i]})
now = time.time()
print("Training Loss: {:.2f}, Training Accuracy: {:.4f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
if test_dataloader:
# test model
test_epoch_loss, test_epoch_acc = validate_epoch.call(model, test_dataloader, loss_fn, modal_device)
# testing metrics
testing_loss.append(test_epoch_loss/len(test_dataloader))
testing_acc.append(test_epoch_acc/len(test_dataloader))
print("Testing Loss: {:.2f}, Testing Accuracy {:.4f}".format(testing_loss[i], testing_acc[i]))
if wandb_enabled:
wandb.log({'testing_loss': testing_loss[i], 'testing_acc': testing_acc[i]})
print ("-------------------------------------------------------- \n")
end = time.time()
wandb.finish()
print("-------- Finished Training --------")
print("-------- Total Time -- {:.2f}s --------".format(end - begin))
return model.to(origin_device)
@stub.function(
gpu=gpu.A100(memory=20),
mounts=[
Mount.from_local_file(local_path='dataset.py'),
Mount.from_local_file(local_path='cnn.py'),
],
timeout=600,
)
def train_epoch(model, train_dataloader, loss_fn, optimizer, device):
import torch
from tqdm import tqdm
train_loss = 0.0
train_acc = 0.0
total = 0.0
model.train()
for wav, target in tqdm(train_dataloader):
wav, target = wav.to(device), target.to(device)
# calculate loss
output = model(wav)
loss = loss_fn(output, target)
# backprop and update weights
optimizer.zero_grad()
loss.backward()
optimizer.step()
# metrics
train_loss += loss.item()
prediction = torch.argmax(output, 1)
train_acc += (prediction == target).sum().item()/len(prediction)
total += 1
return model, train_loss, train_acc
@stub.function(
gpu=gpu.A100(memory=20),
mounts=[
Mount.from_local_file(local_path='dataset.py'),
Mount.from_local_file(local_path='cnn.py'),
],
)
def validate_epoch(model, test_dataloader, loss_fn, device):
from tqdm import tqdm
test_loss = 0.0
test_acc = 0.0
total = 0.0
model.eval()
with torch.no_grad():
for wav, target in tqdm(test_dataloader, "Testing batch..."):
wav, target = wav.to(device), target.to(device)
output = model(wav)
loss = loss_fn(output, target)
test_loss += loss.item()
prediciton = torch.argmax(output, 1)
test_acc += (prediciton == target).sum().item()/len(prediciton)
total += 1
return test_loss, test_acc
def save_model(model):
now = time.strftime("%Y%m%d_%H%M%S")
model_filename = f"models/void_{now}.pth"
torch.save(model.state_dict(), model_filename)
print(f"Trained void model saved at {model_filename}")
def get_device():
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
return device
@stub.local_entrypoint()
def main():
print("Initiating model training...")
device = get_device()
# instantiating our dataset object and create data loader
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
sample_rate=SAMPLE_RATE,
n_fft=2048,
hop_length=512,
n_mels=128
)
# dataset/dataloader
train_dataset = VoiceDataset(TRAIN_FILE, mel_spectrogram, device, time_limit_in_secs=3)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataset = VoiceDataset(TEST_FILE, mel_spectrogram, device, time_limit_in_secs=3)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
# construct model
model = CNNetwork()
# init loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
# train model
model = train.call(model, train_dataloader, loss_fn, optimizer, device, EPOCHS, test_dataloader, True)
# save model
save_model(model)