ckadirt commited on Mar 30, 2024

Commit

65ffd92

verified ·

1 Parent(s): 6edc6bc

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +11 -0
cache/models--facebook--musicgen-small/blobs/1bdc99d43eb6c775967df24b65b0a9f847c0907e95664698d93b5a1c35f5090d +3 -0
cache/models--facebook--musicgen-small/blobs/45e996eaadd56e1cdaab46cb5e97d295541d40a3 +10 -0
cache/models--facebook--musicgen-small/blobs/9664ce3a7ca28d1084f10413971b54481198589a +298 -0
cache/models--facebook--musicgen-small/refs/main +1 -0
cache/models--facebook--musicgen-small/snapshots/51027f0bee8489c1750a7b8a4806894ab2e7dc4d/config.json +298 -0
cache/models--facebook--musicgen-small/snapshots/51027f0bee8489c1750a7b8a4806894ab2e7dc4d/generation_config.json +10 -0
cache/models--facebook--musicgen-small/snapshots/51027f0bee8489c1750a7b8a4806894ab2e7dc4d/model.safetensors +3 -0
data/encodec32khz_testing_embeds_sorted.npy +3 -0
data/encodec32khz_training_embeds_sorted.npy +3 -0
data/encodec_test_embeds.npy +3 -0
data/encodec_testing_embeds_sorted.npy +3 -0
data/encodec_training_embeds.npy +3 -0
data/encodec_training_embeds_sorted.npy +3 -0
data/sub-001_Resp_Test.npy +3 -0
data/sub-001_Resp_Test_Mean.npy +3 -0
data/sub-001_Resp_Training.npy +3 -0
src/.ipynb_checkpoints/Copy_of_MusicGen-checkpoint.ipynb +0 -0
src/.ipynb_checkpoints/MLP-model copy-checkpoint.ipynb +582 -0
src/.ipynb_checkpoints/MLP-model-checkpoint.ipynb +449 -0
src/.ipynb_checkpoints/MLPencoder-checkpoint.ipynb +0 -0
src/.ipynb_checkpoints/mlpdummy-checkpoint.py +146 -0
src/.ipynb_checkpoints/musicgen_test copy-checkpoint.ipynb +0 -0
src/Copy_of_MusicGen.ipynb +0 -0
src/MLP-model copy.ipynb +581 -0
src/MLP-model.ipynb +448 -0
src/MLPencoder.ipynb +0 -0
src/b2m-ckpt1 +0 -0
src/b2m-ckpt1.pt +3 -0
src/mlpdummy.py +146 -0
src/musicgen_test copy.ipynb +0 -0
src/musicgen_test.ipynb +0 -0
src/outputs_train0.pt +3 -0
src/outputs_train1.pt +3 -0
src/outputs_train10.pt +3 -0
src/outputs_train11.pt +3 -0
src/outputs_train12.pt +3 -0
src/outputs_train13.pt +3 -0
src/outputs_train14.pt +3 -0
src/outputs_train15.pt +3 -0
src/outputs_train16.pt +3 -0
src/outputs_train17.pt +3 -0
src/outputs_train18.pt +3 -0
src/outputs_train19.pt +3 -0
src/outputs_train2.pt +3 -0
src/outputs_train20.pt +3 -0
src/outputs_train21.pt +3 -0
src/outputs_train22.pt +3 -0
src/outputs_train23.pt +3 -0
src/outputs_train24.pt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cache/models--facebook--musicgen-small/blobs/1bdc99d43eb6c775967df24b65b0a9f847c0907e95664698d93b5a1c35f5090d filter=lfs diff=lfs merge=lfs -text
+src/playground.ipynb filter=lfs diff=lfs merge=lfs -text
+src/wandb/latest-run/run-jggbeix7.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230831_163543-sqaecwr8/run-sqaecwr8.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230831_165224-f6iksuh9/run-f6iksuh9.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230831_200743-4bm6v8ps/run-4bm6v8ps.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230831_203013-sdb63g4i/run-sdb63g4i.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230831_220330-mf53e4vk/run-mf53e4vk.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230901_003519-kxdc1ebl/run-kxdc1ebl.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230901_011334-pej3kex1/run-pej3kex1.wandb filter=lfs diff=lfs merge=lfs -text
+src/wandb/run-20230908_042527-jggbeix7/run-jggbeix7.wandb filter=lfs diff=lfs merge=lfs -text

cache/models--facebook--musicgen-small/blobs/1bdc99d43eb6c775967df24b65b0a9f847c0907e95664698d93b5a1c35f5090d ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bdc99d43eb6c775967df24b65b0a9f847c0907e95664698d93b5a1c35f5090d
+size 2364427288

cache/models--facebook--musicgen-small/blobs/45e996eaadd56e1cdaab46cb5e97d295541d40a3 ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2048,
+  "decoder_start_token_id": 2048,
+  "do_sample": true,
+  "guidance_scale": 3.0,
+  "max_length": 1500,
+  "pad_token_id": 2048,
+  "transformers_version": "4.31.0.dev0"
+}

cache/models--facebook--musicgen-small/blobs/9664ce3a7ca28d1084f10413971b54481198589a ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+  "_commit_hash": null,
+  "architectures": [
+    "MusicgenForConditionalGeneration"
+  ],
+  "audio_encoder": {
+    "_name_or_path": "facebook/encodec_32khz",
+    "add_cross_attention": false,
+    "architectures": [
+      "EncodecModel"
+    ],
+    "audio_channels": 1,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_length_s": null,
+    "chunk_size_feed_forward": 0,
+    "codebook_dim": 128,
+    "codebook_size": 2048,
+    "compress": 2,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "dilation_growth_rate": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_size": 128,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "kernel_size": 7,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "last_kernel_size": 7,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "encodec",
+    "no_repeat_ngram_size": 0,
+    "norm_type": "weight_norm",
+    "normalize": false,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_filters": 64,
+    "num_lstm_layers": 2,
+    "num_residual_layers": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "overlap": null,
+    "pad_mode": "reflect",
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "residual_kernel_size": 3,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sampling_rate": 32000,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "target_bandwidths": [
+      2.2
+    ],
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "trim_right_ratio": 1.0,
+    "typical_p": 1.0,
+    "upsampling_ratios": [
+      8,
+      5,
+      4,
+      4
+    ],
+    "use_bfloat16": false,
+    "use_causal_conv": false,
+    "use_conv_shortcut": false
+  },
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 2048,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "ffn_dim": 4096,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layerdrop": 0.0,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 2048,
+    "min_length": 0,
+    "model_type": "musicgen_decoder",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_codebooks": 4,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 2048,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 2048
+  },
+  "is_encoder_decoder": true,
+  "model_type": "musicgen",
+  "text_encoder": {
+    "_name_or_path": "t5-base",
+    "add_cross_attention": false,
+    "architectures": [
+      "T5ForConditionalGeneration"
+    ],
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "d_ff": 3072,
+    "d_kv": 64,
+    "d_model": 768,
+    "decoder_start_token_id": 0,
+    "dense_act_fn": "relu",
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout_rate": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 1,
+    "exponential_decay_length_penalty": null,
+    "feed_forward_proj": "relu",
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "is_decoder": false,
+    "is_encoder_decoder": true,
+    "is_gated_act": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_epsilon": 1e-06,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "t5",
+    "n_positions": 512,
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_decoder_layers": 12,
+    "num_heads": 12,
+    "num_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_past": true,
+    "output_scores": false,
+    "pad_token_id": 0,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "relative_attention_max_distance": 128,
+    "relative_attention_num_buckets": 32,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": {
+      "summarization": {
+        "early_stopping": true,
+        "length_penalty": 2.0,
+        "max_length": 200,
+        "min_length": 30,
+        "no_repeat_ngram_size": 3,
+        "num_beams": 4,
+        "prefix": "summarize: "
+      },
+      "translation_en_to_de": {
+        "early_stopping": true,
+        "max_length": 300,
+        "num_beams": 4,
+        "prefix": "translate English to German: "
+      },
+      "translation_en_to_fr": {
+        "early_stopping": true,
+        "max_length": 300,
+        "num_beams": 4,
+        "prefix": "translate English to French: "
+      },
+      "translation_en_to_ro": {
+        "early_stopping": true,
+        "max_length": 300,
+        "num_beams": 4,
+        "prefix": "translate English to Romanian: "
+      }
+    },
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 32128
+  },
+  "torch_dtype": "float32",
+  "transformers_version": null
+}

cache/models--facebook--musicgen-small/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ 51027f0bee8489c1750a7b8a4806894ab2e7dc4d

cache/models--facebook--musicgen-small/snapshots/51027f0bee8489c1750a7b8a4806894ab2e7dc4d/config.json ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+  "_commit_hash": null,
+  "architectures": [
+    "MusicgenForConditionalGeneration"
+  ],
+  "audio_encoder": {
+    "_name_or_path": "facebook/encodec_32khz",
+    "add_cross_attention": false,
+    "architectures": [
+      "EncodecModel"
+    ],
+    "audio_channels": 1,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_length_s": null,
+    "chunk_size_feed_forward": 0,
+    "codebook_dim": 128,
+    "codebook_size": 2048,
+    "compress": 2,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "dilation_growth_rate": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_size": 128,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "kernel_size": 7,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "last_kernel_size": 7,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "encodec",
+    "no_repeat_ngram_size": 0,
+    "norm_type": "weight_norm",
+    "normalize": false,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_filters": 64,
+    "num_lstm_layers": 2,
+    "num_residual_layers": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "overlap": null,
+    "pad_mode": "reflect",
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "residual_kernel_size": 3,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sampling_rate": 32000,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "target_bandwidths": [
+      2.2
+    ],
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "trim_right_ratio": 1.0,
+    "typical_p": 1.0,
+    "upsampling_ratios": [
+      8,
+      5,
+      4,
+      4
+    ],
+    "use_bfloat16": false,
+    "use_causal_conv": false,
+    "use_conv_shortcut": false
+  },
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 2048,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "ffn_dim": 4096,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layerdrop": 0.0,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 2048,
+    "min_length": 0,
+    "model_type": "musicgen_decoder",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_codebooks": 4,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 2048,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 2048
+  },
+  "is_encoder_decoder": true,
+  "model_type": "musicgen",
+  "text_encoder": {
+    "_name_or_path": "t5-base",
+    "add_cross_attention": false,
+    "architectures": [
+      "T5ForConditionalGeneration"
+    ],
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "d_ff": 3072,
+    "d_kv": 64,
+    "d_model": 768,
+    "decoder_start_token_id": 0,
+    "dense_act_fn": "relu",
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout_rate": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 1,
+    "exponential_decay_length_penalty": null,
+    "feed_forward_proj": "relu",
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "is_decoder": false,
+    "is_encoder_decoder": true,
+    "is_gated_act": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_epsilon": 1e-06,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "t5",
+    "n_positions": 512,
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_decoder_layers": 12,
+    "num_heads": 12,
+    "num_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_past": true,
+    "output_scores": false,
+    "pad_token_id": 0,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "relative_attention_max_distance": 128,
+    "relative_attention_num_buckets": 32,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": {
+      "summarization": {
+        "early_stopping": true,
+        "length_penalty": 2.0,
+        "max_length": 200,
+        "min_length": 30,
+        "no_repeat_ngram_size": 3,
+        "num_beams": 4,
+        "prefix": "summarize: "
+      },
+      "translation_en_to_de": {
+        "early_stopping": true,
+        "max_length": 300,
+        "num_beams": 4,
+        "prefix": "translate English to German: "
+      },
+      "translation_en_to_fr": {
+        "early_stopping": true,
+        "max_length": 300,
+        "num_beams": 4,
+        "prefix": "translate English to French: "
+      },
+      "translation_en_to_ro": {
+        "early_stopping": true,
+        "max_length": 300,
+        "num_beams": 4,
+        "prefix": "translate English to Romanian: "
+      }
+    },
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.31.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 32128
+  },
+  "torch_dtype": "float32",
+  "transformers_version": null
+}

cache/models--facebook--musicgen-small/snapshots/51027f0bee8489c1750a7b8a4806894ab2e7dc4d/generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2048,
+  "decoder_start_token_id": 2048,
+  "do_sample": true,
+  "guidance_scale": 3.0,
+  "max_length": 1500,
+  "pad_token_id": 2048,
+  "transformers_version": "4.31.0.dev0"
+}

cache/models--facebook--musicgen-small/snapshots/51027f0bee8489c1750a7b8a4806894ab2e7dc4d/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bdc99d43eb6c775967df24b65b0a9f847c0907e95664698d93b5a1c35f5090d
+size 2364427288

data/encodec32khz_testing_embeds_sorted.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:365ada4922d3328895b978df9127c44a35a807732157839ae862c23acb174719
+size 2880128

data/encodec32khz_training_embeds_sorted.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b26b9d0e279143f1cad0c7e3b3879afb0c10fc2ba774c1f35521ac04efde67f4
+size 5760128

data/encodec_test_embeds.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ba64ae0cc073640c9104ee24814b480bab5fedbefc3e1fc82f7435243090875
+size 2160128

data/encodec_testing_embeds_sorted.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:691527ea72cbec7b67821d15ce2a179648d25b08b488afec7bcc3580fee2b4d3
+size 2160128

data/encodec_training_embeds.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:733ea631bafb622c61b325435f3beaa6d92d8cbe1756ea2eab40ce1ca9f14170
+size 4320128

data/encodec_training_embeds_sorted.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2da1180e4324c6fa148c531160aa179c8f00dd677de076555bf632ccc5f9e09c
+size 4320128

data/sub-001_Resp_Test.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f38bf483013229e1e2d56111fd041743e0948f649aa041ddb0aa0bfbfe0f7e3
+size 583526528

data/sub-001_Resp_Test_Mean.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abc68b4d0b8974b6d3a720d463beb52ee464860a3d324c1c8e696109248a76c7
+size 145881728

data/sub-001_Resp_Training.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66aaaf44e962c1f3121e7f06b8a07fc9140d10be48cc71e0e3d7a1feaa6b130b
+size 1167052928

src/.ipynb_checkpoints/Copy_of_MusicGen-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/.ipynb_checkpoints/MLP-model copy-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,582 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ckadirt/miniconda3/envs/b2m/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os, torch, torch.nn as nn, torch.utils.data as data, torchvision as tv\n",
+    "import lightning as L\n",
+    "import numpy as np, pandas as pd, matplotlib.pyplot as plt\n",
+    "from pytorch_lightning.loggers import WandbLogger"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create the datasets and dataloaders\n",
+    "train_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Training.npy' # path to training voxels 65000 * 4800 \n",
+    "test_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Test_Mean.npy' # path to test voxels 65000 * 600\n",
+    "\n",
+    "train_embeddings_path = '/home/ckadirt/brain2music/encodec_training_embeds_150.npy' # path to training embeddings 480 * 2 * 1125\n",
+    "test_embeddings_path = '/home/ckadirt/brain2music/encodec_test_embeds_150.npy' # path to test embeddings 600 * 2 * 1125\n",
+    "\n",
+    "class VoxelsDataset(data.Dataset):\n",
+    "    def __init__(self, voxels_path, embeddings_path):\n",
+    "        # transpose the two dimensions of the voxels data to match the embeddings data\n",
+    "        self.voxels = torch.from_numpy(np.load(voxels_path)).float().transpose(0, 1)\n",
+    "        self.embeddings = torch.from_numpy(np.load(embeddings_path))\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        self.len = len(self.voxels) // 10\n",
+    "        print(\"The len is \", self.len  )\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        voxels = self.voxels[index*10:(index+1)*10]\n",
+    "        embeddings = self.embeddings[index]\n",
+    "        return voxels, embeddings\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.len\n",
+    "    \n",
+    "class VoxelsEmbeddinsEncodecDataModule(L.LightningDataModule):\n",
+    "    def __init__(self, train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4):\n",
+    "        super().__init__()\n",
+    "        self.train_voxels_path = train_voxels_path\n",
+    "        self.train_embeddings_path = train_embeddings_path\n",
+    "        self.test_voxels_path = test_voxels_path\n",
+    "        self.test_embeddings_path = test_embeddings_path\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "    def setup(self, stage=None):\n",
+    "        self.train_dataset = VoxelsDataset(self.train_voxels_path, self.train_embeddings_path)\n",
+    "        self.test_dataset = VoxelsDataset(self.test_voxels_path, self.test_embeddings_path)\n",
+    "\n",
+    "    def train_dataloader(self):\n",
+    "        return data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)\n",
+    "\n",
+    "    def val_dataloader(self):\n",
+    "        return data.DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_module_example = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_module_example.setup()\n",
+    "train_dataloader = data_module_example.train_dataloader()\n",
+    "val_dataset = data_module_example.val_dataloader()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([], size=(0, 60784)),\n",
+       " tensor([[ 302.,  244.,  660.,  854.,  660.,  480.,  854.,  618.,  618.,  854.,\n",
+       "           790.,  750.,  659.,   59.,  891.,  891.,  536.,  167.,  343.,  536.,\n",
+       "           715.,  758.,  758.,  758.,  480.,  498.,  854.,    4.,    4.,  308.,\n",
+       "           270.,  342.,  342.,  660.,  342.,  854.,  342.,  435.,  549.,  150.,\n",
+       "           631.,  485.,  844.,  366.,  266.,   35.,  847.,  667.,  862.,  109.,\n",
+       "           573.,  379.,  226.,  573.,  603.,  513.,  178.,  302.,  715.,  631.,\n",
+       "           342.,  258.,  244.,  302.,  715.,  854.,  854.,  294.,  366.,  660.,\n",
+       "           361.,  302.,  729.,  962.,  790.,  711.,  660.,  243.,  294.,  802.,\n",
+       "           329.,  513.,  962.,  342.,  711.,  244.,  243.,  549.,  802.,  854.,\n",
+       "           750.,   81.,  342.,  381.,  854.,  603.,  790.,  109.,  294.,  513.,\n",
+       "           419.,  485.,  504.,  660.,  361.,  790.,  790.,  167.,  802.,  246.,\n",
+       "           485.,  246.,   81., 1023.,  149.,   81.,  943.,  504.,  755.,  414.,\n",
+       "           246.,  972.,  715., 1023.,  790.,  692.,  790.,  572.,  504.,  302.,\n",
+       "           308.,  853.,  631.,  657.,  790.,  361.,  660.,  715.,  686.,  213.,\n",
+       "           226.,  187.,  586.,  361.,  485.,  790.,  729.,  951.,  962.,  485.],\n",
+       "         [ 963.,  645.,  645.,  326.,  138., 1013.,  680.,  525.,  411.,  102.,\n",
+       "           462.,  466.,  698.,  409.,  289.,  923.,  878.,  415.,  386.,  604.,\n",
+       "           975.,  162.,  603.,  284.,  233.,   75.,  244., 1016., 1016.,  242.,\n",
+       "            67.,  194.,  122.,  492.,  856.,  997.,  997.,  221.,  243.,  814.,\n",
+       "           386.,  598.,  317.,  166.,  583.,  439.,  654.,  430.,  201.,  160.,\n",
+       "           813.,  716.,  312.,  664.,  204.,  462.,  375.,  451.,   67.,  535.,\n",
+       "           854.,  209.,  548.,  812.,  657.,  827.,  408.,  411.,  422.,  352.,\n",
+       "            99.,  711.,  664.,  239.,  890.,  529.,  617.,  186.,  536.,  178.,\n",
+       "            29.,  930.,  187.,  973.,  354.,  450.,  468.,  273.,  995.,  653.,\n",
+       "           935.,  335.,  973.,  812.,  348.,  664.,  575.,  184.,  299.,  782.,\n",
+       "            36.,   29.,  641.,  653.,  105.,  958.,  653.,  828.,  981.,  218.,\n",
+       "          1021.,  381.,  356.,   35.,  416.,  675.,   45.,  839.,  690.,  331.,\n",
+       "           634.,  610.,  317.,  745.,  673.,  331.,  575.,   57.,  100.,  564.,\n",
+       "           590.,  492.,  902.,   53.,   73.,  332., 1005.,  395.,  679.,  781.,\n",
+       "           174.,   74.,  121.,  667.,  265.,  479.,  583.,  655.,  163.,   81.]]))"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "val_dataset.dataset[239]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "        self.test_outptus = []\n",
+    "        self.train_outptus = []\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels[:, 0:2, :]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss, sync_dist=True)\n",
+    "        self.log('train_accuracy', acuracy, sync_dist=True)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.train_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "    \n",
+    "    def on_train_epoch_end(self):\n",
+    "        self.train_outptus = torch.cat(self.train_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.train_outptus, 'outputs_train'+str(self.current_epoch)+'.pt')\n",
+    "        self.train_outptus = []\n",
+    "    \n",
+    "    def on_validation_epoch_end(self):\n",
+    "        self.test_outptus = torch.cat(self.test_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.test_outptus, 'outputs_validation'+str(self.current_epoch)+'.pt')\n",
+    "        self.test_outptus = []\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels[:, 0:2, :]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss, sync_dist=True)\n",
+    "        self.log('val_accuracy', accuracy, sync_dist=True)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.test_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-6)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 500, 500, 150*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.3, 0.3, 0.3, 0.3]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.MSELoss()\n",
+    "        self.test_outptus = []\n",
+    "        self.train_outptus = []\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1) # the size is [batch_size, 2250]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.train_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "    def on_train_epoch_end(self):\n",
+    "        self.train_outptus = torch.cat(self.train_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.train_outptus, 'outputs_train'+str(self.current_epoch)+'.pt')\n",
+    "        self.train_outptus = []\n",
+    "    \n",
+    "    def on_validation_epoch_end(self):\n",
+    "        self.test_outptus = torch.cat(self.test_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.test_outptus, 'outputs_validation'+str(self.current_epoch)+'.pt')\n",
+    "        self.test_outptus = []\n",
+    "\n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1)\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.test_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 150*2*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32)\n",
+    "\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=2)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-6)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.2, 0.2, 0.2, 0.2]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model3.eval()\n",
+    "outputs = torch.Tensor((480,200))\n",
+    "with torch.no_grad():\n",
+    "    test_dataset = VoxelsDataset(test_voxels_path, test_embeddings_path)\n",
+    "    dataloader = data.DataLoader(test_dataset, batch_size = 2)\n",
+    "    for i, (voxels, embeddings) in enumerate(dataloader):\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        bout = model3(voxels)\n",
+    "        bout = bout.reshape(-1, 1024, 200)\n",
+    "        # the 1024 dimension is the number of tokens, we need to get the index of the maximum value of each token\n",
+    "        bout = bout.argmax(dim=1)\n",
+    "        # now we need to add the outputs to the outputs tensor\n",
+    "        outputs[i*2:(i+1)*2] = bout\n",
+    "        \n",
+    "    \n",
+    "# save the predicted outputs on the current directory\n",
+    "torch.save(outputs, 'outputs.pt')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "b2m",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

src/.ipynb_checkpoints/MLP-model-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,449 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, torch, torch.nn as nn, torch.utils.data as data, torchvision as tv\n",
+    "import lightning as L\n",
+    "import numpy as np, pandas as pd, matplotlib.pyplot as plt\n",
+    "from pytorch_lightning.loggers import WandbLogger"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create the datasets and dataloaders\n",
+    "train_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Training.npy' # path to training voxels 65000 * 4800 \n",
+    "test_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Test_Mean.npy' # path to test voxels 65000 * 600\n",
+    "\n",
+    "train_embeddings_path = '/home/ckadirt/brain2music/dataset/Gtanz/audios/sub-001/encodec_embeddings_train.pt' # path to training embeddings 480 * 2 * 1125\n",
+    "test_embeddings_path = '/home/ckadirt/brain2music/dataset/Gtanz/audios/sub-001/encodec_embeddings_test.pt' # path to test embeddings 600 * 2 * 1125\n",
+    "\n",
+    "class VoxelsDataset(data.Dataset):\n",
+    "    def __init__(self, voxels_path, embeddings_path):\n",
+    "        # transpose the two dimensions of the voxels data to match the embeddings data\n",
+    "        self.voxels = torch.from_numpy(np.load(voxels_path)).float().transpose(0, 1)\n",
+    "        self.embeddings = torch.load(embeddings_path)\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        self.len = len(self.voxels) // 10\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        voxels = self.voxels[index*10:(index+1)*10]\n",
+    "        embeddings = self.embeddings[index]\n",
+    "        return voxels, embeddings\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.len\n",
+    "    \n",
+    "class VoxelsEmbeddinsEncodecDataModule(L.LightningDataModule):\n",
+    "    def __init__(self, train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32):\n",
+    "        super().__init__()\n",
+    "        self.train_voxels_path = train_voxels_path\n",
+    "        self.train_embeddings_path = train_embeddings_path\n",
+    "        self.test_voxels_path = test_voxels_path\n",
+    "        self.test_embeddings_path = test_embeddings_path\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "    def setup(self, stage=None):\n",
+    "        self.train_dataset = VoxelsDataset(self.train_voxels_path, self.train_embeddings_path)\n",
+    "        self.test_dataset = VoxelsDataset(self.test_voxels_path, self.test_embeddings_path)\n",
+    "\n",
+    "    def train_dataloader(self):\n",
+    "        return data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)\n",
+    "\n",
+    "    def test_dataloader(self):\n",
+    "        return data.DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.ModuleList()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.append(nn.Linear(sizes[i], sizes[i+1]))\n",
+    "        self.relu = nn.ReLU()\n",
+    "        self.loss = nn.MSELoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x_states = [x]\n",
+    "        for i in range(len(self.layers)):\n",
+    "            x = self.layers[i](x)\n",
+    "            for j in self.residual_conections[i]:\n",
+    "                x = x + x_states[j]\n",
+    "            x = self.relu(x)\n",
+    "            x = nn.Dropout(self.dropout[i])(x)\n",
+    "            x_states.append(x)\n",
+    "\n",
+    "        return x\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1) # the size is [batch_size, 2250]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1)\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-3)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 2250]\n",
+    "residual_conections = [[0], [1], [2,1], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32)\n",
+    "\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=100, logger=wandb_logger, precision='16-mixed')\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.MSELoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1) # the size is [batch_size, 2250]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1)\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 2250]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32)\n",
+    "\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=2)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-6)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.2, 0.2, 0.2, 0.2]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model3.eval()\n",
+    "outputs = torch.Tensor((480,200))\n",
+    "with torch.no_grad():\n",
+    "    test_dataset = VoxelsDataset(test_voxels_path, test_embeddings_path)\n",
+    "    dataloader = data.DataLoader(test_dataset, batch_size = 2)\n",
+    "    for i, (voxels, embeddings) in enumerate(dataloader):\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        bout = model3(voxels)\n",
+    "        bout = bout.reshape(-1, 1024, 200)\n",
+    "        # the 1024 dimension is the number of tokens, we need to get the index of the maximum value of each token\n",
+    "        bout = bout.argmax(dim=1)\n",
+    "        # now we need to add the outputs to the outputs tensor\n",
+    "        outputs[i*2:(i+1)*2] = bout\n",
+    "        \n",
+    "    \n",
+    "# save the predicted outputs on the current directory\n",
+    "torch.save(outputs, 'outputs.pt')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "b2m",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

src/.ipynb_checkpoints/MLPencoder-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/.ipynb_checkpoints/mlpdummy-checkpoint.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import os, torch, torch.nn as nn, torch.utils.data as data, torchvision as tv
+import lightning as L
+import numpy as np, pandas as pd, matplotlib.pyplot as plt
+from pytorch_lightning.loggers import WandbLogger
+import wandb
+import pytorch_lightning as pl
+torch.set_float32_matmul_precision('medium')
+# create the datasets and dataloaders
+train_voxels_path = '/fsx/proj-fmri/ckadirt/b2m/data/sub-001_Resp_Training.npy' # path to training voxels 65000 * 4800
+test_voxels_path = '/fsx/proj-fmri/ckadirt/b2m/data/sub-001_Resp_Test_Mean.npy' # path to test voxels 65000 * 600
+train_embeddings_path = '/fsx/proj-fmri/ckadirt/b2m/data/encodec_training_embeds_sorted.npy' # path to training embeddings 480 * 2 * 1125
+test_embeddings_path = '/fsx/proj-fmri/ckadirt/b2m/data/encodec_testing_embeds_sorted.npy' # path to test embeddings 600 * 2 * 1125
+class VoxelsDataset(data.Dataset):
+    def __init__(self, voxels_path, embeddings_path):
+        # transpose the two dimensions of the voxels data to match the embeddings data
+        self.voxels = torch.from_numpy(np.load(voxels_path)).float().transpose(0, 1)
+        self.embeddings = torch.from_numpy(np.load(embeddings_path))
+        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus
+        self.len = len(self.voxels) // 10
+        print("The len is ", self.len  )
+    def __getitem__(self, index):
+        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus
+        voxels = self.voxels[index*10:(index+1)*10]
+        embeddings = self.embeddings[index]
+        return voxels, embeddings
+    def __len__(self):
+        return self.len
+class VoxelsEmbeddinsEncodecDataModule(pl.LightningDataModule):
+    def __init__(self, train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=8):
+        super().__init__()
+        self.train_voxels_path = train_voxels_path
+        self.train_embeddings_path = train_embeddings_path
+        self.test_voxels_path = test_voxels_path
+        self.test_embeddings_path = test_embeddings_path
+        self.batch_size = batch_size
+    def setup(self, stage=None):
+        self.train_dataset = VoxelsDataset(self.train_voxels_path, self.train_embeddings_path)
+        self.test_dataset = VoxelsDataset(self.test_voxels_path, self.test_embeddings_path)
+    def train_dataloader(self):
+        return data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
+    def val_dataloader(self):
+        return data.DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
+class MLP(pl.LightningModule):
+    def __init__(self, sizes, residual_conections, dropout):
+        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]
+        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]
+        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
+        super().__init__()
+        self.sizes = sizes
+        self.residual_conections = residual_conections
+        self.dropout = dropout
+        self.layers = nn.Sequential()
+        for i in range(len(sizes)-1):
+            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))
+            self.layers.add_module('relu'+str(i), nn.ReLU())
+            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))
+        self.loss = nn.CrossEntropyLoss(reduction='mean')
+    def forward(self, x):
+        return self.layers(x)
+    def training_step(self, batch, batch_idx):
+        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]
+        # flatten the voxels to [batch_size, rest of the dimensions]
+        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250]
+        #take just the first 200 embeddings
+        # embeddings = embeddings[:, :200]
+        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus
+        voxels = voxels.mean(dim=1)
+        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]
+        outputs = self(voxels)
+        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]
+        outputs = outputs.reshape(-1, 1024, 1125*2)
+        # avoid division by zero
+        outputs = outputs + 1e-6
+        #print(outputs.shape, embeddings.shape)
+        #print(outputs[0,0,:10], embeddings[0,:10])
+        loss = self.loss(outputs, embeddings)
+        #print(loss)
+        acuracy = self.tokens_accuracy(outputs, embeddings)
+        self.log('train_loss', loss)
+        self.log('train_accuracy', acuracy)
+        return loss
+    def tokens_accuracy(self, outputs, embeddings):
+        # outputs is [batch_size, 1024, 200]
+        # embeddings is [batch_size, 200]
+        # we need to get the index of the maximum value of each token
+        outputs = outputs.argmax(dim=1)
+        # now we need to compare the outputs with the embeddings
+        return (outputs == embeddings).float().mean()
+    def validation_step(self, batch, batch_idx):
+        voxels, embeddings = batch
+        embeddings = embeddings.flatten(start_dim=1).long()
+        #embeddings = embeddings[:, :200]
+        voxels = voxels.mean(dim=1)
+        voxels = voxels.flatten(start_dim=1)
+        outputs = self(voxels)
+        outputs = outputs.reshape(-1, 1024, 1125*2)
+        loss = self.loss(outputs, embeddings)
+        accuracy = self.tokens_accuracy(outputs, embeddings)
+        self.log('val_loss', loss)
+        self.log('val_accuracy', accuracy)
+        return loss
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.trainer.model.parameters(), lr=2e-5, weight_decay=3e-3)
+# create the model
+sizes = [60784, 1000, 1000, 1125*2*1024]
+residual_conections = [[0], [1], [2], [3]]
+dropout = [0.5, 0.5, 0.5, 0.5]
+model = MLP(sizes, residual_conections, dropout)
+# create the data module
+data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)
+wandb.finish()
+from pytorch_lightning.strategies import DeepSpeedStrategy
+wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')
+# define the trainer
+trainer = pl.Trainer(accelerator="gpu", devices = [0,1,2,3,4,5,6,7], max_epochs=1000, logger=wandb_logger, precision='32', strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=8), enable_checkpointing=False, log_every_n_steps=10)
+#trainer = pl.Trainer(accelerator="gpu", devices = [0,1,2,3], max_epochs=1000, logger=wandb_logger, precision='bf16', strategy='fsdp', enable_checkpointing=False, log_every_n_steps=10)
+# train the model
+trainer.fit(model, datamodule=data_module)

src/.ipynb_checkpoints/musicgen_test copy-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/Copy_of_MusicGen.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/MLP-model copy.ipynb ADDED Viewed

	@@ -0,0 +1,581 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ckadirt/miniconda3/envs/b2m/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os, torch, torch.nn as nn, torch.utils.data as data, torchvision as tv\n",
+    "import lightning as L\n",
+    "import numpy as np, pandas as pd, matplotlib.pyplot as plt\n",
+    "from pytorch_lightning.loggers import WandbLogger"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create the datasets and dataloaders\n",
+    "train_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Training.npy' # path to training voxels 65000 * 4800 \n",
+    "test_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Test_Mean.npy' # path to test voxels 65000 * 600\n",
+    "\n",
+    "train_embeddings_path = '/home/ckadirt/brain2music/encodec_training_embeds_150.npy' # path to training embeddings 480 * 2 * 1125\n",
+    "test_embeddings_path = '/home/ckadirt/brain2music/encodec_test_embeds_150.npy' # path to test embeddings 600 * 2 * 1125\n",
+    "\n",
+    "class VoxelsDataset(data.Dataset):\n",
+    "    def __init__(self, voxels_path, embeddings_path):\n",
+    "        # transpose the two dimensions of the voxels data to match the embeddings data\n",
+    "        self.voxels = torch.from_numpy(np.load(voxels_path)).float().transpose(0, 1)\n",
+    "        self.embeddings = torch.from_numpy(np.load(embeddings_path))\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        self.len = len(self.voxels) // 10\n",
+    "        print(\"The len is \", self.len  )\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        voxels = self.voxels[index*10:(index+1)*10]\n",
+    "        embeddings = self.embeddings[index]\n",
+    "        return voxels, embeddings\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.len\n",
+    "    \n",
+    "class VoxelsEmbeddinsEncodecDataModule(L.LightningDataModule):\n",
+    "    def __init__(self, train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4):\n",
+    "        super().__init__()\n",
+    "        self.train_voxels_path = train_voxels_path\n",
+    "        self.train_embeddings_path = train_embeddings_path\n",
+    "        self.test_voxels_path = test_voxels_path\n",
+    "        self.test_embeddings_path = test_embeddings_path\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "    def setup(self, stage=None):\n",
+    "        self.train_dataset = VoxelsDataset(self.train_voxels_path, self.train_embeddings_path)\n",
+    "        self.test_dataset = VoxelsDataset(self.test_voxels_path, self.test_embeddings_path)\n",
+    "\n",
+    "    def train_dataloader(self):\n",
+    "        return data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)\n",
+    "\n",
+    "    def val_dataloader(self):\n",
+    "        return data.DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_module_example = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_module_example.setup()\n",
+    "train_dataloader = data_module_example.train_dataloader()\n",
+    "val_dataset = data_module_example.val_dataloader()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([], size=(0, 60784)),\n",
+       " tensor([[ 302.,  244.,  660.,  854.,  660.,  480.,  854.,  618.,  618.,  854.,\n",
+       "           790.,  750.,  659.,   59.,  891.,  891.,  536.,  167.,  343.,  536.,\n",
+       "           715.,  758.,  758.,  758.,  480.,  498.,  854.,    4.,    4.,  308.,\n",
+       "           270.,  342.,  342.,  660.,  342.,  854.,  342.,  435.,  549.,  150.,\n",
+       "           631.,  485.,  844.,  366.,  266.,   35.,  847.,  667.,  862.,  109.,\n",
+       "           573.,  379.,  226.,  573.,  603.,  513.,  178.,  302.,  715.,  631.,\n",
+       "           342.,  258.,  244.,  302.,  715.,  854.,  854.,  294.,  366.,  660.,\n",
+       "           361.,  302.,  729.,  962.,  790.,  711.,  660.,  243.,  294.,  802.,\n",
+       "           329.,  513.,  962.,  342.,  711.,  244.,  243.,  549.,  802.,  854.,\n",
+       "           750.,   81.,  342.,  381.,  854.,  603.,  790.,  109.,  294.,  513.,\n",
+       "           419.,  485.,  504.,  660.,  361.,  790.,  790.,  167.,  802.,  246.,\n",
+       "           485.,  246.,   81., 1023.,  149.,   81.,  943.,  504.,  755.,  414.,\n",
+       "           246.,  972.,  715., 1023.,  790.,  692.,  790.,  572.,  504.,  302.,\n",
+       "           308.,  853.,  631.,  657.,  790.,  361.,  660.,  715.,  686.,  213.,\n",
+       "           226.,  187.,  586.,  361.,  485.,  790.,  729.,  951.,  962.,  485.],\n",
+       "         [ 963.,  645.,  645.,  326.,  138., 1013.,  680.,  525.,  411.,  102.,\n",
+       "           462.,  466.,  698.,  409.,  289.,  923.,  878.,  415.,  386.,  604.,\n",
+       "           975.,  162.,  603.,  284.,  233.,   75.,  244., 1016., 1016.,  242.,\n",
+       "            67.,  194.,  122.,  492.,  856.,  997.,  997.,  221.,  243.,  814.,\n",
+       "           386.,  598.,  317.,  166.,  583.,  439.,  654.,  430.,  201.,  160.,\n",
+       "           813.,  716.,  312.,  664.,  204.,  462.,  375.,  451.,   67.,  535.,\n",
+       "           854.,  209.,  548.,  812.,  657.,  827.,  408.,  411.,  422.,  352.,\n",
+       "            99.,  711.,  664.,  239.,  890.,  529.,  617.,  186.,  536.,  178.,\n",
+       "            29.,  930.,  187.,  973.,  354.,  450.,  468.,  273.,  995.,  653.,\n",
+       "           935.,  335.,  973.,  812.,  348.,  664.,  575.,  184.,  299.,  782.,\n",
+       "            36.,   29.,  641.,  653.,  105.,  958.,  653.,  828.,  981.,  218.,\n",
+       "          1021.,  381.,  356.,   35.,  416.,  675.,   45.,  839.,  690.,  331.,\n",
+       "           634.,  610.,  317.,  745.,  673.,  331.,  575.,   57.,  100.,  564.,\n",
+       "           590.,  492.,  902.,   53.,   73.,  332., 1005.,  395.,  679.,  781.,\n",
+       "           174.,   74.,  121.,  667.,  265.,  479.,  583.,  655.,  163.,   81.]]))"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "val_dataset.dataset[239]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "        self.test_outptus = []\n",
+    "        self.train_outptus = []\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels[:, 0:2, :]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss, sync_dist=True)\n",
+    "        self.log('train_accuracy', acuracy, sync_dist=True)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.train_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "    \n",
+    "    def on_train_epoch_end(self):\n",
+    "        self.train_outptus = torch.cat(self.train_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.train_outptus, 'outputs_train'+str(self.current_epoch)+'.pt')\n",
+    "        self.train_outptus = []\n",
+    "    \n",
+    "    def on_validation_epoch_end(self):\n",
+    "        self.test_outptus = torch.cat(self.test_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.test_outptus, 'outputs_validation'+str(self.current_epoch)+'.pt')\n",
+    "        self.test_outptus = []\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels[:, 0:2, :]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss, sync_dist=True)\n",
+    "        self.log('val_accuracy', accuracy, sync_dist=True)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.test_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-6)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 500, 500, 150*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.3, 0.3, 0.3, 0.3]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.MSELoss()\n",
+    "        self.test_outptus = []\n",
+    "        self.train_outptus = []\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1) # the size is [batch_size, 2250]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.train_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "    def on_train_epoch_end(self):\n",
+    "        self.train_outptus = torch.cat(self.train_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.train_outptus, 'outputs_train'+str(self.current_epoch)+'.pt')\n",
+    "        self.train_outptus = []\n",
+    "    \n",
+    "    def on_validation_epoch_end(self):\n",
+    "        self.test_outptus = torch.cat(self.test_outptus)\n",
+    "        # save the outputs with the current epoch name\n",
+    "        torch.save(self.test_outptus, 'outputs_validation'+str(self.current_epoch)+'.pt')\n",
+    "        self.test_outptus = []\n",
+    "\n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1)\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        discrete_outputs = outputs.argmax(dim=1)\n",
+    "        self.test_outptus.append(discrete_outputs)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 150*2*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32)\n",
+    "\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=2)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-6)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.2, 0.2, 0.2, 0.2]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model3.eval()\n",
+    "outputs = torch.Tensor((480,200))\n",
+    "with torch.no_grad():\n",
+    "    test_dataset = VoxelsDataset(test_voxels_path, test_embeddings_path)\n",
+    "    dataloader = data.DataLoader(test_dataset, batch_size = 2)\n",
+    "    for i, (voxels, embeddings) in enumerate(dataloader):\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        bout = model3(voxels)\n",
+    "        bout = bout.reshape(-1, 1024, 200)\n",
+    "        # the 1024 dimension is the number of tokens, we need to get the index of the maximum value of each token\n",
+    "        bout = bout.argmax(dim=1)\n",
+    "        # now we need to add the outputs to the outputs tensor\n",
+    "        outputs[i*2:(i+1)*2] = bout\n",
+    "        \n",
+    "    \n",
+    "# save the predicted outputs on the current directory\n",
+    "torch.save(outputs, 'outputs.pt')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

src/MLP-model.ipynb ADDED Viewed

	@@ -0,0 +1,448 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, torch, torch.nn as nn, torch.utils.data as data, torchvision as tv\n",
+    "import lightning as L\n",
+    "import numpy as np, pandas as pd, matplotlib.pyplot as plt\n",
+    "from pytorch_lightning.loggers import WandbLogger"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create the datasets and dataloaders\n",
+    "train_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Training.npy' # path to training voxels 65000 * 4800 \n",
+    "test_voxels_path = '/home/ckadirt/brain2music/dataset/preproc/sub-001_Resp_Test_Mean.npy' # path to test voxels 65000 * 600\n",
+    "\n",
+    "train_embeddings_path = '/home/ckadirt/brain2music/dataset/Gtanz/audios/sub-001/encodec_embeddings_train.pt' # path to training embeddings 480 * 2 * 1125\n",
+    "test_embeddings_path = '/home/ckadirt/brain2music/dataset/Gtanz/audios/sub-001/encodec_embeddings_test.pt' # path to test embeddings 600 * 2 * 1125\n",
+    "\n",
+    "class VoxelsDataset(data.Dataset):\n",
+    "    def __init__(self, voxels_path, embeddings_path):\n",
+    "        # transpose the two dimensions of the voxels data to match the embeddings data\n",
+    "        self.voxels = torch.from_numpy(np.load(voxels_path)).float().transpose(0, 1)\n",
+    "        self.embeddings = torch.load(embeddings_path)\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        self.len = len(self.voxels) // 10\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus\n",
+    "        voxels = self.voxels[index*10:(index+1)*10]\n",
+    "        embeddings = self.embeddings[index]\n",
+    "        return voxels, embeddings\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.len\n",
+    "    \n",
+    "class VoxelsEmbeddinsEncodecDataModule(L.LightningDataModule):\n",
+    "    def __init__(self, train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32):\n",
+    "        super().__init__()\n",
+    "        self.train_voxels_path = train_voxels_path\n",
+    "        self.train_embeddings_path = train_embeddings_path\n",
+    "        self.test_voxels_path = test_voxels_path\n",
+    "        self.test_embeddings_path = test_embeddings_path\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "    def setup(self, stage=None):\n",
+    "        self.train_dataset = VoxelsDataset(self.train_voxels_path, self.train_embeddings_path)\n",
+    "        self.test_dataset = VoxelsDataset(self.test_voxels_path, self.test_embeddings_path)\n",
+    "\n",
+    "    def train_dataloader(self):\n",
+    "        return data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)\n",
+    "\n",
+    "    def test_dataloader(self):\n",
+    "        return data.DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.ModuleList()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.append(nn.Linear(sizes[i], sizes[i+1]))\n",
+    "        self.relu = nn.ReLU()\n",
+    "        self.loss = nn.MSELoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x_states = [x]\n",
+    "        for i in range(len(self.layers)):\n",
+    "            x = self.layers[i](x)\n",
+    "            for j in self.residual_conections[i]:\n",
+    "                x = x + x_states[j]\n",
+    "            x = self.relu(x)\n",
+    "            x = nn.Dropout(self.dropout[i])(x)\n",
+    "            x_states.append(x)\n",
+    "\n",
+    "        return x\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1) # the size is [batch_size, 2250]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1)\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-3)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 2250]\n",
+    "residual_conections = [[0], [1], [2,1], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32)\n",
+    "\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=100, logger=wandb_logger, precision='16-mixed')\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.MSELoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1) # the size is [batch_size, 2250]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1)\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 2250]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=32)\n",
+    "\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels.mean(dim=1)\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-5)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.5, 0.5, 0.5, 0.5]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=2)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MLP(L.LightningModule):\n",
+    "    def __init__(self, sizes, residual_conections, dropout):\n",
+    "        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]\n",
+    "        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]\n",
+    "        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
+    "        super().__init__()\n",
+    "        self.sizes = sizes\n",
+    "        self.residual_conections = residual_conections\n",
+    "        self.dropout = dropout\n",
+    "        self.layers = nn.Sequential()\n",
+    "        for i in range(len(sizes)-1):\n",
+    "            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))\n",
+    "            self.layers.add_module('relu'+str(i), nn.ReLU())\n",
+    "            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))\n",
+    "\n",
+    "        self.loss = nn.CrossEntropyLoss()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.layers(x)\n",
+    "    \n",
+    "    def training_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]\n",
+    "        # flatten the voxels to [batch_size, rest of the dimensions]\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250] \n",
+    "        #take just the first 200 embeddings\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        outputs = self(voxels)\n",
+    "        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        acuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('train_loss', loss)\n",
+    "        self.log('train_accuracy', acuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "    def tokens_accuracy(self, outputs, embeddings):\n",
+    "        # outputs is [batch_size, 1024, 200]\n",
+    "        # embeddings is [batch_size, 200]\n",
+    "        # we need to get the index of the maximum value of each token\n",
+    "        outputs = outputs.argmax(dim=1)\n",
+    "        # now we need to compare the outputs with the embeddings\n",
+    "        return (outputs == embeddings).float().mean()\n",
+    "\n",
+    "    \n",
+    "    def validation_step(self, batch, batch_idx):\n",
+    "        voxels, embeddings = batch\n",
+    "        embeddings = embeddings.flatten(start_dim=1).long()\n",
+    "        embeddings = embeddings[:, :200]\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1)\n",
+    "        outputs = self(voxels)\n",
+    "        outputs = outputs.reshape(-1, 1024, 200)\n",
+    "        loss = self.loss(outputs, embeddings)\n",
+    "        accuracy = self.tokens_accuracy(outputs, embeddings)\n",
+    "        self.log('val_loss', loss)\n",
+    "        self.log('val_accuracy', accuracy)\n",
+    "        return loss\n",
+    "    \n",
+    "        \n",
+    "    def configure_optimizers(self):\n",
+    "        return torch.optim.Adam(self.parameters(), lr=1e-6)\n",
+    "    \n",
+    "\n",
+    "# create the model\n",
+    "sizes = [60784, 1000, 1000, 200*1024]\n",
+    "residual_conections = [[0], [1], [2], [3]]\n",
+    "dropout = [0.2, 0.2, 0.2, 0.2]\n",
+    "model = MLP(sizes, residual_conections, dropout)\n",
+    "\n",
+    "# create the data module\n",
+    "data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)\n",
+    "\n",
+    "wandb.finish()\n",
+    "\n",
+    "wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')\n",
+    "\n",
+    "# define the trainer\n",
+    "trainer = L.Trainer(devices=2, accelerator=\"gpu\", max_epochs=400, logger=wandb_logger, precision='16-mixed', log_every_n_steps=10)\n",
+    "\n",
+    "# train the model\n",
+    "trainer.fit(model, datamodule=data_module)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model3.eval()\n",
+    "outputs = torch.Tensor((480,200))\n",
+    "with torch.no_grad():\n",
+    "    test_dataset = VoxelsDataset(test_voxels_path, test_embeddings_path)\n",
+    "    dataloader = data.DataLoader(test_dataset, batch_size = 2)\n",
+    "    for i, (voxels, embeddings) in enumerate(dataloader):\n",
+    "        voxels = voxels[:, 1, :]\n",
+    "        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]\n",
+    "        bout = model3(voxels)\n",
+    "        bout = bout.reshape(-1, 1024, 200)\n",
+    "        # the 1024 dimension is the number of tokens, we need to get the index of the maximum value of each token\n",
+    "        bout = bout.argmax(dim=1)\n",
+    "        # now we need to add the outputs to the outputs tensor\n",
+    "        outputs[i*2:(i+1)*2] = bout\n",
+    "        \n",
+    "    \n",
+    "# save the predicted outputs on the current directory\n",
+    "torch.save(outputs, 'outputs.pt')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

src/MLPencoder.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/b2m-ckpt1 ADDED Viewed

Binary file (184 kB). View file

src/b2m-ckpt1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:324b3fec6e3363f45b6e886219effec76e6a8a8b207cb6391eb7c0829b106484
+size 183603

src/mlpdummy.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import os, torch, torch.nn as nn, torch.utils.data as data, torchvision as tv
+import lightning as L
+import numpy as np, pandas as pd, matplotlib.pyplot as plt
+from pytorch_lightning.loggers import WandbLogger
+import wandb
+import pytorch_lightning as pl
+torch.set_float32_matmul_precision('medium')
+# create the datasets and dataloaders
+train_voxels_path = '/fsx/proj-fmri/ckadirt/b2m/data/sub-001_Resp_Training.npy' # path to training voxels 65000 * 4800
+test_voxels_path = '/fsx/proj-fmri/ckadirt/b2m/data/sub-001_Resp_Test_Mean.npy' # path to test voxels 65000 * 600
+train_embeddings_path = '/fsx/proj-fmri/ckadirt/b2m/data/encodec_training_embeds_sorted.npy' # path to training embeddings 480 * 2 * 1125
+test_embeddings_path = '/fsx/proj-fmri/ckadirt/b2m/data/encodec_testing_embeds_sorted.npy' # path to test embeddings 600 * 2 * 1125
+class VoxelsDataset(data.Dataset):
+    def __init__(self, voxels_path, embeddings_path):
+        # transpose the two dimensions of the voxels data to match the embeddings data
+        self.voxels = torch.from_numpy(np.load(voxels_path)).float().transpose(0, 1)
+        self.embeddings = torch.from_numpy(np.load(embeddings_path))
+        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus
+        self.len = len(self.voxels) // 10
+        print("The len is ", self.len  )
+    def __getitem__(self, index):
+        # as each stimulus has been exposed for 15 seconds and the fMRI data is sampled every 1.5 seconds, we take 10 samples per stimulus
+        voxels = self.voxels[index*10:(index+1)*10]
+        embeddings = self.embeddings[index]
+        return voxels, embeddings
+    def __len__(self):
+        return self.len
+class VoxelsEmbeddinsEncodecDataModule(pl.LightningDataModule):
+    def __init__(self, train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=8):
+        super().__init__()
+        self.train_voxels_path = train_voxels_path
+        self.train_embeddings_path = train_embeddings_path
+        self.test_voxels_path = test_voxels_path
+        self.test_embeddings_path = test_embeddings_path
+        self.batch_size = batch_size
+    def setup(self, stage=None):
+        self.train_dataset = VoxelsDataset(self.train_voxels_path, self.train_embeddings_path)
+        self.test_dataset = VoxelsDataset(self.test_voxels_path, self.test_embeddings_path)
+    def train_dataloader(self):
+        return data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
+    def val_dataloader(self):
+        return data.DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
+class MLP(pl.LightningModule):
+    def __init__(self, sizes, residual_conections, dropout):
+        # sizes is a list of the sizes of the layers ej: [4800, 1000, 1000, 1000, 1000, 1000, 1000, 600]
+        # residual_conections is a list with the same length as sizes, each element is a list of the indexes of the layers that will recieve the output of the layer as input, 0 means that the layer will recieve the x inputs ej. [[0], [1], [2,1], [3], [4,3], [5], [6,5], [7]]
+        # dropout is a list with the same length as sizes, each element is the dropout probability of the layer ej. [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
+        super().__init__()
+        self.sizes = sizes
+        self.residual_conections = residual_conections
+        self.dropout = dropout
+        self.layers = nn.Sequential()
+        for i in range(len(sizes)-1):
+            self.layers.add_module('linear'+str(i), nn.Linear(sizes[i], sizes[i+1]))
+            self.layers.add_module('relu'+str(i), nn.ReLU())
+            self.layers.add_module('dropout'+str(i), nn.Dropout(dropout[i]))
+        self.loss = nn.CrossEntropyLoss(reduction='mean')
+    def forward(self, x):
+        return self.layers(x)
+    def training_step(self, batch, batch_idx):
+        voxels, embeddings = batch # the sizes are [batch_size, 10, 65000] and [batch_size, 2, 1125]
+        # flatten the voxels to [batch_size, rest of the dimensions]
+        embeddings = embeddings.flatten(start_dim=1).long() # the size is [batch_size, 2250]
+        #take just the first 200 embeddings
+        # embeddings = embeddings[:, :200]
+        # take the mean of the second dimension of the voxels to get the mean of the 10 samples per stimulus
+        voxels = voxels.mean(dim=1)
+        voxels = voxels.flatten(start_dim=1) # the size is [batch_size, 65000]
+        outputs = self(voxels)
+        # the outputs are [batch_size, 200*1024], we need to reshape them to [batch_size, 200, 1024]
+        outputs = outputs.reshape(-1, 1024, 1125*2)
+        # avoid division by zero
+        outputs = outputs + 1e-6
+        #print(outputs.shape, embeddings.shape)
+        #print(outputs[0,0,:10], embeddings[0,:10])
+        loss = self.loss(outputs, embeddings)
+        #print(loss)
+        acuracy = self.tokens_accuracy(outputs, embeddings)
+        self.log('train_loss', loss)
+        self.log('train_accuracy', acuracy)
+        return loss
+    def tokens_accuracy(self, outputs, embeddings):
+        # outputs is [batch_size, 1024, 200]
+        # embeddings is [batch_size, 200]
+        # we need to get the index of the maximum value of each token
+        outputs = outputs.argmax(dim=1)
+        # now we need to compare the outputs with the embeddings
+        return (outputs == embeddings).float().mean()
+    def validation_step(self, batch, batch_idx):
+        voxels, embeddings = batch
+        embeddings = embeddings.flatten(start_dim=1).long()
+        #embeddings = embeddings[:, :200]
+        voxels = voxels.mean(dim=1)
+        voxels = voxels.flatten(start_dim=1)
+        outputs = self(voxels)
+        outputs = outputs.reshape(-1, 1024, 1125*2)
+        loss = self.loss(outputs, embeddings)
+        accuracy = self.tokens_accuracy(outputs, embeddings)
+        self.log('val_loss', loss)
+        self.log('val_accuracy', accuracy)
+        return loss
+    def configure_optimizers(self):
+        return torch.optim.Adam(self.trainer.model.parameters(), lr=2e-5, weight_decay=3e-3)
+# create the model
+sizes = [60784, 1000, 1000, 1125*2*1024]
+residual_conections = [[0], [1], [2], [3]]
+dropout = [0.5, 0.5, 0.5, 0.5]
+model = MLP(sizes, residual_conections, dropout)
+# create the data module
+data_module = VoxelsEmbeddinsEncodecDataModule(train_voxels_path, train_embeddings_path, test_voxels_path, test_embeddings_path, batch_size=4)
+wandb.finish()
+from pytorch_lightning.strategies import DeepSpeedStrategy
+wandb_logger = WandbLogger(project='brain2music', entity='ckadirt')
+# define the trainer
+trainer = pl.Trainer(accelerator="gpu", devices = [0,1,2,3,4,5,6,7], max_epochs=1000, logger=wandb_logger, precision='32', strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=8), enable_checkpointing=False, log_every_n_steps=10)
+#trainer = pl.Trainer(accelerator="gpu", devices = [0,1,2,3], max_epochs=1000, logger=wandb_logger, precision='bf16', strategy='fsdp', enable_checkpointing=False, log_every_n_steps=10)
+# train the model
+trainer.fit(model, datamodule=data_module)

src/musicgen_test copy.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/musicgen_test.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

src/outputs_train0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c329bb538c038cb0997e4d8a28c1194b985fe749d335acdd217d4584d5a5f33
+size 11505408

src/outputs_train1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b943d8ad8171f749eb949a15b2b533febef14b63766a3822216ab6ca0136a7df
+size 11505408

src/outputs_train10.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44220fbaaae452574cbbf3ee994930e1f82b71a80e87d413fa7c1e9e982ba628
+size 11505411

src/outputs_train11.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c724ecb56118655468b1f029e21b26bccfda1a2fbbaa22384c807e64ab04eba
+size 11505411

src/outputs_train12.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:471d0f66b3f174442b1001bdab8672f22d5f0a8958de547beb332ee7bf3326f2
+size 11505411

src/outputs_train13.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a25c407a91bdd02cbc45ab829c1bb41c74865611d2e0ccab2129f266c3002f3
+size 11505411

src/outputs_train14.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89b0eeb9b9aae498e1caaed017ee6d6f7d8819e388853fbb3f0f510b87b3f54b
+size 11505411

src/outputs_train15.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21c8ee497ee20e58bbb164ae8bdece4d11fcc49b956d9024dd9d445acdd655cc
+size 11505411

src/outputs_train16.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9c20f0192317d604a06ea1f0bece0e292c9c3b5d7fbe4f38baa048f2c21ea6
+size 11505411

src/outputs_train17.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcf1d56c5e018539f8df4c27ceae6d4586f7f337597efee39575520c94bcef02
+size 11505411

src/outputs_train18.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f43266b7e5fd07645e2775b51133e13c052530715e0a94885bc44b5ad80943f
+size 11505411

src/outputs_train19.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22e50667a061182431fb5b6e0f1d973942d813a399baad1fa2289197a4ba61e8
+size 11505411

src/outputs_train2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31e397b8e34a6b00ce8ec2930269f78323b5a5717586a941d50fc6d7a11aab09
+size 11505408

src/outputs_train20.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06b1e7abd9058661a98adfc9416e59563bc2c81f5e722748c6a857b8e646dc44
+size 11505411

src/outputs_train21.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:700abd1ea9a42a56deea97287da1ed5991cdac64d3ff2b254c122025e02445de
+size 11505411

src/outputs_train22.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9eb089a91056c1c9fe77240d15c89298c9124e0f82a6c38d29c555cedeb025ec
+size 11505411

src/outputs_train23.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47c6f65fc2cc3728caa62269ced13927b8d7de17190e2c9a0068a7d57181a1aa
+size 11505411

src/outputs_train24.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30752d07d2360df5b65bc9ce02cfe388f85cdb4b68b966e4a701055ba4049025
+size 11505411