Wonder-Griffin
/

Judge-GPT2

Generated from Trainer

Model card Files Files and versions Metrics Training metrics Community

Wonder-Griffin commited on Aug 15, 2024

Commit

9a97de5

·

verified ·

1 Parent(s): 1693c68

Update config.json

Files changed (1) hide show

config.json +69 -43

config.json CHANGED Viewed

@@ -1,43 +1,69 @@
-{
-  "architectures": [
-    "JudgeModel"
-  ],
-  "batch_size": 32,
-  "bias": true,
-  "block_size": 512,
-  "dim_feedforward": 3072,
-  "dropout": 0.1,
-  "ff_expansion_factor": 4,
-  "hidden_act": "gelu",
-  "hidden_size": 768,
-  "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1",
-    "2": "LABEL_2",
-    "3": "LABEL_3",
-    "4": "LABEL_4"
-  },
-  "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1,
-    "LABEL_2": 2,
-    "LABEL_3": 3,
-    "LABEL_4": 4
-  },
-  "label_smoothing": 0.1,
-  "learning_rate": 0.0003,
-  "log_interval": 100,
-  "max_grad_norm": 1.0,
-  "max_position_embeddings": 512,
-  "model_type": "openai-gpt",
-  "n_embd": 768,
-  "n_head": 12,
-  "n_layer": 12,
-  "output_dir": "C:/Users/wonde/output",
-  "torch_dtype": "float32",
-  "total_steps": 10000,
-  "transformers_version": "4.43.3",
-  "vocab_size": 50257,
-  "warmup_steps": 1000,
-  "weight_decay": 0.01
-}

+{
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "inference_mode": true,
+  "batch_size": 32,
+  "bias": true,
+  "block_size": 512,
+  "dim_feedforward": 3072,
+  "dropout": 0.1,
+  "ff_expansion_factor": 4,
+  "hidden_act": "gelu",
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4"
+  },
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4
+  },
+  "label_smoothing": 0.1,
+  "learning_rate": 0.0003,
+  "log_interval": 100,
+  "max_grad_norm": 1.0,
+  "max_position_embeddings": 512,
+  "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_layer": 12,
+  "output_dir": "C:/Users/wonde/output",
+  "torch_dtype": "float32",
+  "total_steps": 10000,
+  "transformers_version": "4.43.3",
+  "vocab_size": 50257,
+  "warmup_steps": 1000,
+  "weight_decay": 0.01,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_proj",
+    "c_attn",
+    "c_fc",
+    "score"
+  ],
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "task_type": "SEQ_CLS",
+  "use_dora": false,
+  "use_rslora": false
+}