{ "audio_fr": 16000, "causal_attn": true, "class_dropout_prob": 0.1, "depth": 12, "grad_ckpt": false, "hidden_size": 768, "in_channels": 4, "latent_size": 32, "mlp_ratio": 4.0, "n_mels": 256, "num_classes": 0, "num_heads": 12, "patch_size": 1, "predict_frames": 10 }