{ "architectures": [ "GPT2LMHeadModel" ], "inference_mode": true, "batch_size": 32, "bias": true, "block_size": 512, "dim_feedforward": 3072, "dropout": 0.1, "ff_expansion_factor": 4, "hidden_act": "gelu", "hidden_size": 768, "id2label": { "0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2", "3": "LABEL_3", "4": "LABEL_4" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2, "LABEL_3": 3, "LABEL_4": 4 }, "label_smoothing": 0.1, "learning_rate": 0.0003, "log_interval": 100, "max_grad_norm": 1.0, "max_position_embeddings": 512, "model_type": "gpt2", "n_embd": 768, "n_head": 12, "n_layer": 12, "output_dir": "C:/Users/wonde/output", "torch_dtype": "float32", "total_steps": 10000, "transformers_version": "4.43.3", "vocab_size": 50257, "warmup_steps": 1000, "weight_decay": 0.01, "megatron_config": null, "megatron_core": "megatron.core", "modules_to_save": [ "classifier", "score" ], "peft_type": "LORA", "r": 16, "rank_pattern": {}, "revision": null, "target_modules": [ "c_proj", "c_attn", "c_fc", "score" ], "task_specific_params": { "text-generation": { "do_sample": true, "max_length": 50 } }, "task_type": "SEQ_CLS", "use_dora": false, "use_rslora": false }