Upload 17 files

Browse files

Files changed (17) hide show

added_tokens.json +25 -0
config.json +17 -0
generation_config.json +7 -0
merges.txt +0 -0
optimizer.pt +3 -0
pytorch_model-00001-of-00002.bin +3 -0
pytorch_model-00002-of-00002.bin +3 -0
pytorch_model.bin.index.json +0 -0
rng_state.pth +3 -0
scaler.pt +3 -0
scheduler.pt +3 -0
special_tokens_map.json +55 -0
tokenizer.json +0 -0
tokenizer_config.json +237 -0
trainer_state.json +32 -0
training_args.bin +3 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "</auditory_input>": 50271,
+  "</auditory_output>": 50277,
+  "</evaluate>": 50267,
+  "</internal_thinking>": 50261,
+  "</plan>": 50265,
+  "</reasoning>": 50263,
+  "</s>": 50259,
+  "</tactile_input>": 50273,
+  "</tactile_output>": 50279,
+  "</visual_input>": 50269,
+  "</visual_output>": 50275,
+  "<auditory_input>": 50270,
+  "<auditory_output>": 50276,
+  "<evaluate>": 50266,
+  "<internal_thinking>": 50260,
+  "<pad>": 50257,
+  "<plan>": 50264,
+  "<reasoning>": 50262,
+  "<s>": 50258,
+  "<tactile_input>": 50272,
+  "<tactile_output>": 50278,
+  "<visual_input>": 50268,
+  "<visual_output>": 50274
+}

config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "adaptation_rate": 0.01,
+  "architectures": [
+    "DynamicNeuralNetwork"
+  ],
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "max_neurons": 8192,
+  "moe_load_balancing_weight": 0.01,
+  "moe_router_z_loss_weight": 0.001,
+  "moe_top_k": 4,
+  "num_moe_experts": 16,
+  "pad_token_id": 50256,
+  "reward_loss_weight": 0.1,
+  "torch_dtype": "float32",
+  "vocab_size": 50280
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "pad_token_id": 50256,
+  "transformers_version": "4.49.0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab620ccd1457defb275e2190c1b6b7946ad1d0f4bec20ad8400756b4e6ddfd19
+size 51624

pytorch_model-00001-of-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:badfb4eb7df079642237e9f944cd0040da96808ebf1355095d146cff7087be7a
+size 5004049233

pytorch_model-00002-of-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a171d6fe6d1d840e0fb7a23c03f0361b98d6fd23fb79c077c18efaf125037a40
+size 769579254

pytorch_model.bin.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9851ad32c4e0ad0da405a48c3ec889cc14dd3dc2c68a992facd9724babccbafe
+size 14244

scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd88c1400734381e4f63d3ace0ea9a85534c65976e4e34bd6f4e74b7cb7c880b
+size 988

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24712a3aafb7674c0670bb054afa3785b078b5ba01826f54d21063188b9ec55c
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "additional_special_tokens": [
+    "<pad>",
+    "<s>",
+    "</s>",
+    "<internal_thinking>",
+    "</internal_thinking>",
+    "<reasoning>",
+    "</reasoning>",
+    "<plan>",
+    "</plan>",
+    "<evaluate>",
+    "</evaluate>",
+    "<visual_input>",
+    "</visual_input>",
+    "<auditory_input>",
+    "</auditory_input>",
+    "<tactile_input>",
+    "</tactile_input>",
+    "<visual_output>",
+    "</visual_output>",
+    "<auditory_output>",
+    "</auditory_output>",
+    "<tactile_output>",
+    "</tactile_output>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,237 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50258": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50259": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50260": {
+      "content": "<internal_thinking>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50261": {
+      "content": "</internal_thinking>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50262": {
+      "content": "<reasoning>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50263": {
+      "content": "</reasoning>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<plan>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50265": {
+      "content": "</plan>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50266": {
+      "content": "<evaluate>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50267": {
+      "content": "</evaluate>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50268": {
+      "content": "<visual_input>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50269": {
+      "content": "</visual_input>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50270": {
+      "content": "<auditory_input>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50271": {
+      "content": "</auditory_input>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50272": {
+      "content": "<tactile_input>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50273": {
+      "content": "</tactile_input>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50274": {
+      "content": "<visual_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50275": {
+      "content": "</visual_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50276": {
+      "content": "<auditory_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50277": {
+      "content": "</auditory_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50278": {
+      "content": "<tactile_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50279": {
+      "content": "</tactile_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<pad>",
+    "<s>",
+    "</s>",
+    "<internal_thinking>",
+    "</internal_thinking>",
+    "<reasoning>",
+    "</reasoning>",
+    "<plan>",
+    "</plan>",
+    "<evaluate>",
+    "</evaluate>",
+    "<visual_input>",
+    "</visual_input>",
+    "<auditory_input>",
+    "</auditory_input>",
+    "<tactile_input>",
+    "</tactile_input>",
+    "<visual_output>",
+    "</visual_output>",
+    "<auditory_output>",
+    "</auditory_output>",
+    "<tactile_output>",
+    "</tactile_output>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "max_length": 1024,
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "stride": 0,
+  "tokenizer_class": "GPT2Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.016129032258064516,
+  "eval_steps": 1,
+  "global_step": 1,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 100,
+  "max_steps": 186,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 93822155378688.0,
+  "train_batch_size": 22,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:668b56f0110a8fec2ddec0e6330a327029f7602abb7dd83c5f4322fc84e2308f
+size 5304

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff