ShaoRun commited on 23 days ago

Commit

79ba0db

verified ·

1 Parent(s): b595125

Upload 22 files

Browse files

Files changed (23) hide show

.gitattributes +1 -0
added_tokens.json +26 -0
config.json +224 -0
generation_config.json +4 -0
merges.txt +0 -0
model-00001-of-00009.safetensors +3 -0
model-00002-of-00009.safetensors +3 -0
model-00003-of-00009.safetensors +3 -0
model-00004-of-00009.safetensors +3 -0
model-00005-of-00009.safetensors +3 -0
model-00006-of-00009.safetensors +3 -0
model-00007-of-00009.safetensors +3 -0
model-00008-of-00009.safetensors +3 -0
model-00009-of-00009.safetensors +3 -0
model.safetensors.index.json +0 -0
preprocessor_config.json +24 -0
runs/Feb27_11-49-49_pytorch-f7e2cb9f-jqm28/events.out.tfevents.1740628401.pytorch-f7e2cb9f-jqm28.223.0 +3 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +224 -0
trainer_state.json +819 -0
training_args.bin +3 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|point_placeholder|>": 151666,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_placeholder|>": 151665,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,224 @@

+{
+  "_name_or_path": "/g0001sr/code/allsparkv2_250120/model_zoo/rotation_point_stage1/allsparkv2_qwen25_7B_vision_stage1_stage1_5_stage2_point_reusetext_stage1",
+  "add_moe": true,
+  "architectures": [
+    "AllSparkForCausalLM"
+  ],
+  "hidden_size": 3584,
+  "ignore_index": -100,
+  "initializer_range": 0.02,
+  "llm_config": {
+    "_attn_implementation_autoset": true,
+    "_name_or_path": "./model_weights/Qwen2.5-7B-Instruct",
+    "add_cross_attention": false,
+    "architectures": [
+      "Qwen2ForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 151643,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 151645,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "silu",
+    "hidden_size": 3584,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "intermediate_size": 18944,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 32768,
+    "max_window_layers": 28,
+    "min_length": 0,
+    "model_type": "qwen2",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 28,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 28,
+    "num_key_value_heads": 4,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000.0,
+    "sep_token_id": null,
+    "sliding_window": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "bfloat16",
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 151667
+  },
+  "llm_name_or_path": "./model_weights/Qwen2.5-7B-Instruct",
+  "modal_configs": [
+    {
+      "encoder_cfg": {
+        "_attn_implementation_autoset": true,
+        "_name_or_path": "./model_weights/siglip-so400m-patch14-384",
+        "add_cross_attention": false,
+        "architectures": null,
+        "attention_dropout": 0.0,
+        "bad_words_ids": null,
+        "begin_suppress_tokens": null,
+        "bos_token_id": null,
+        "chunk_size_feed_forward": 0,
+        "cross_attention_hidden_size": null,
+        "decoder_start_token_id": null,
+        "diversity_penalty": 0.0,
+        "do_sample": false,
+        "early_stopping": false,
+        "encoder_no_repeat_ngram_size": 0,
+        "eos_token_id": null,
+        "exponential_decay_length_penalty": null,
+        "finetuning_task": null,
+        "forced_bos_token_id": null,
+        "forced_eos_token_id": null,
+        "hidden_act": "gelu_pytorch_tanh",
+        "hidden_size": 1152,
+        "id2label": {
+          "0": "LABEL_0",
+          "1": "LABEL_1"
+        },
+        "image_size": 384,
+        "intermediate_size": 4304,
+        "is_decoder": false,
+        "is_encoder_decoder": false,
+        "label2id": {
+          "LABEL_0": 0,
+          "LABEL_1": 1
+        },
+        "layer_norm_eps": 1e-06,
+        "length_penalty": 1.0,
+        "max_length": 20,
+        "min_length": 0,
+        "model_type": "siglip_vision_model",
+        "no_repeat_ngram_size": 0,
+        "num_attention_heads": 16,
+        "num_beam_groups": 1,
+        "num_beams": 1,
+        "num_channels": 3,
+        "num_hidden_layers": 27,
+        "num_return_sequences": 1,
+        "output_attentions": false,
+        "output_hidden_states": false,
+        "output_scores": false,
+        "pad_token_id": null,
+        "patch_size": 14,
+        "prefix": null,
+        "problem_type": null,
+        "pruned_heads": {},
+        "remove_invalid_values": false,
+        "repetition_penalty": 1.0,
+        "return_dict": true,
+        "return_dict_in_generate": false,
+        "sep_token_id": null,
+        "suppress_tokens": null,
+        "task_specific_params": null,
+        "temperature": 1.0,
+        "tf_legacy_loss": false,
+        "tie_encoder_decoder": false,
+        "tie_word_embeddings": true,
+        "tokenizer_class": null,
+        "top_k": 50,
+        "top_p": 1.0,
+        "torch_dtype": null,
+        "torchscript": false,
+        "transformers_version": "4.47.0",
+        "typical_p": 1.0,
+        "use_bfloat16": false
+      },
+      "modal_placeholder_token": "<|vision_placeholder|>",
+      "modal_placeholder_token_id": 151665,
+      "modal_tag": "vision",
+      "model_name_or_path": "./model_weights/siglip-so400m-patch14-384",
+      "multi_grid": false,
+      "proj_input_dim": 1152,
+      "proj_num_layers": 2,
+      "proj_output_dim": 3584
+    },
+    {
+      "depth": 24,
+      "drop_path_rate": 0.1,
+      "embed_dim": 1024,
+      "encoder_cfg": {
+        "depth": 24,
+        "drop_path_rate": 0.1,
+        "embed_dim": 1024,
+        "group_size": 32,
+        "img_queries": 13,
+        "large_embedding": false,
+        "mask_type": "rand",
+        "modal_placeholder_token": "<|point_placeholder|>",
+        "modal_tag": "point",
+        "model_path": "/g0001sr/code/allsparkv2_250120/model_weights/pointencoder/recon2_converted.pth",
+        "num_group": 512,
+        "num_heads": 16,
+        "output_dim": 3584,
+        "pretrained_model_name": "eva_large_patch14_336.in22k_ft_in22k_in1k",
+        "stop_grad": false,
+        "text_queries": 3,
+        "with_color": true
+      },
+      "group_size": 32,
+      "img_queries": 13,
+      "large_embedding": false,
+      "mask_type": "rand",
+      "modal_placeholder_token": "<|point_placeholder|>",
+      "modal_placeholder_token_id": 151666,
+      "modal_tag": "point",
+      "model_path": "/g0001sr/code/allsparkv2_250120/model_weights/pointencoder/recon2_converted.pth",
+      "num_group": 512,
+      "num_heads": 16,
+      "output_dim": 3584,
+      "pretrained_model_name": "eva_large_patch14_336.in22k_ft_in22k_in1k",
+      "stop_grad": false,
+      "text_queries": 3,
+      "with_color": true
+    }
+  ],
+  "model_type": "allspark",
+  "tokenizer_padding_side": "right",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.0"
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "_from_model_config": true,
+  "transformers_version": "4.47.0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5826805e3b1d75407d67e6ad096b07ea4e0975b917db9149c4fd9dcca581c671
+size 4896281328

model-00002-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ef127995c0061506e640b82d0037ad863fe0f6bd87855deec096080861039e5
+size 4987654672

model-00003-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c99767b312e82e63b114ecbf9eb965ea33595101b93d4810531b5700611889f6
+size 4928924360

model-00004-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6686fa68e751abaa7d3cfa98c2e2cf6544caa8dbea557d37d915f5d7e8ee42a0
+size 4987654712

model-00005-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a82561e925f91af1941d72d215e093081d2fdf05817956eec79740475476b8a
+size 4987654736

model-00006-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4c2858316686cc1dcc13155b75253aa7ccf84b429b6aa6b8da455464822f992
+size 4987654736

model-00007-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0215c7bbca3e2fb2d35dcb3119523d268d99334090a19da1a2cf0ebd0c696204
+size 4987654736

model-00008-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5520bc80d6cd9e7d5fe52def3180c7cf49cdfdb80557692bc33bce6bab5a7e0
+size 4521544984

model-00009-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:062578ba94d997c28b697a1700ce2e458a288ae87c17c2a98d9a126b3ad8535c
+size 1087178048

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "do_convert_rgb": null,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "SiglipProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 384,
+    "width": 384
+  }
+}

runs/Feb27_11-49-49_pytorch-f7e2cb9f-jqm28/events.out.tfevents.1740628401.pytorch-f7e2cb9f-jqm28.223.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d88df915f5d264c8fafa27bf64c1aed6dab40de859619f8233d18f42ab9b29
+size 36200

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48e7a0fe20cfac1091a6b008096ee61236548817eee5dc662f85fe9a55aaeb97
+size 11422293

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,224 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<|vision_placeholder|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151666": {
+      "content": "<|point_placeholder|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 4096,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,819 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.999775734469612,
+  "eval_steps": 500,
+  "global_step": 2229,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008970621215519175,
+      "grad_norm": 0.9008947014808655,
+      "learning_rate": 5.970149253731343e-06,
+      "loss": 3.6325,
+      "step": 20
+    },
+    {
+      "epoch": 0.01794124243103835,
+      "grad_norm": 0.5066778063774109,
+      "learning_rate": 1.1940298507462686e-05,
+      "loss": 2.6461,
+      "step": 40
+    },
+    {
+      "epoch": 0.026911863646557524,
+      "grad_norm": 0.5029802322387695,
+      "learning_rate": 1.791044776119403e-05,
+      "loss": 2.6127,
+      "step": 60
+    },
+    {
+      "epoch": 0.0358824848620767,
+      "grad_norm": 0.3778040111064911,
+      "learning_rate": 1.999821584672887e-05,
+      "loss": 2.5859,
+      "step": 80
+    },
+    {
+      "epoch": 0.04485310607759587,
+      "grad_norm": 0.3385876715183258,
+      "learning_rate": 1.998850515736159e-05,
+      "loss": 2.6276,
+      "step": 100
+    },
+    {
+      "epoch": 0.05382372729311505,
+      "grad_norm": 0.2863208055496216,
+      "learning_rate": 1.9970358823117534e-05,
+      "loss": 2.6081,
+      "step": 120
+    },
+    {
+      "epoch": 0.06279434850863422,
+      "grad_norm": 0.31687623262405396,
+      "learning_rate": 1.994379216921594e-05,
+      "loss": 2.5151,
+      "step": 140
+    },
+    {
+      "epoch": 0.0717649697241534,
+      "grad_norm": 0.27848467230796814,
+      "learning_rate": 1.990882763213298e-05,
+      "loss": 2.5367,
+      "step": 160
+    },
+    {
+      "epoch": 0.08073559093967257,
+      "grad_norm": 0.2719942629337311,
+      "learning_rate": 1.986549474065333e-05,
+      "loss": 2.6009,
+      "step": 180
+    },
+    {
+      "epoch": 0.08970621215519174,
+      "grad_norm": 0.28745323419570923,
+      "learning_rate": 1.98138300909321e-05,
+      "loss": 2.6414,
+      "step": 200
+    },
+    {
+      "epoch": 0.09867683337071093,
+      "grad_norm": 0.27338650822639465,
+      "learning_rate": 1.9753877315588072e-05,
+      "loss": 2.3677,
+      "step": 220
+    },
+    {
+      "epoch": 0.1076474545862301,
+      "grad_norm": 0.3499230742454529,
+      "learning_rate": 1.9685687046854415e-05,
+      "loss": 2.4714,
+      "step": 240
+    },
+    {
+      "epoch": 0.11661807580174927,
+      "grad_norm": 0.2322498857975006,
+      "learning_rate": 1.9609316873817992e-05,
+      "loss": 2.5646,
+      "step": 260
+    },
+    {
+      "epoch": 0.12558869701726844,
+      "grad_norm": 0.26533788442611694,
+      "learning_rate": 1.952483129378333e-05,
+      "loss": 2.511,
+      "step": 280
+    },
+    {
+      "epoch": 0.13455931823278763,
+      "grad_norm": 0.2915363013744354,
+      "learning_rate": 1.9432301657802378e-05,
+      "loss": 2.5829,
+      "step": 300
+    },
+    {
+      "epoch": 0.1435299394483068,
+      "grad_norm": 0.23644109070301056,
+      "learning_rate": 1.9331806110416027e-05,
+      "loss": 2.5513,
+      "step": 320
+    },
+    {
+      "epoch": 0.15250056066382597,
+      "grad_norm": 0.23692189157009125,
+      "learning_rate": 1.922342952365829e-05,
+      "loss": 2.4833,
+      "step": 340
+    },
+    {
+      "epoch": 0.16147118187934514,
+      "grad_norm": 0.22117172181606293,
+      "learning_rate": 1.9107263425378873e-05,
+      "loss": 2.5499,
+      "step": 360
+    },
+    {
+      "epoch": 0.1704418030948643,
+      "grad_norm": 0.2725989818572998,
+      "learning_rate": 1.8983405921944686e-05,
+      "loss": 2.4439,
+      "step": 380
+    },
+    {
+      "epoch": 0.17941242431038348,
+      "grad_norm": 0.22964678704738617,
+      "learning_rate": 1.8851961615385542e-05,
+      "loss": 2.5341,
+      "step": 400
+    },
+    {
+      "epoch": 0.18838304552590268,
+      "grad_norm": 0.22048306465148926,
+      "learning_rate": 1.8713041515054065e-05,
+      "loss": 2.5151,
+      "step": 420
+    },
+    {
+      "epoch": 0.19735366674142185,
+      "grad_norm": 0.2410048246383667,
+      "learning_rate": 1.8566762943874376e-05,
+      "loss": 2.4619,
+      "step": 440
+    },
+    {
+      "epoch": 0.20632428795694102,
+      "grad_norm": 0.2247011512517929,
+      "learning_rate": 1.8413249439258743e-05,
+      "loss": 2.5112,
+      "step": 460
+    },
+    {
+      "epoch": 0.2152949091724602,
+      "grad_norm": 0.25305867195129395,
+      "learning_rate": 1.8252630648775874e-05,
+      "loss": 2.5259,
+      "step": 480
+    },
+    {
+      "epoch": 0.22426553038797936,
+      "grad_norm": 0.23119617998600006,
+      "learning_rate": 1.8085042220658993e-05,
+      "loss": 2.488,
+      "step": 500
+    },
+    {
+      "epoch": 0.23323615160349853,
+      "grad_norm": 0.2174287885427475,
+      "learning_rate": 1.791062568924609e-05,
+      "loss": 2.491,
+      "step": 520
+    },
+    {
+      "epoch": 0.24220677281901773,
+      "grad_norm": 0.22464613616466522,
+      "learning_rate": 1.7729528355449214e-05,
+      "loss": 2.4441,
+      "step": 540
+    },
+    {
+      "epoch": 0.25117739403453687,
+      "grad_norm": 0.2646411657333374,
+      "learning_rate": 1.7541903162353638e-05,
+      "loss": 2.4999,
+      "step": 560
+    },
+    {
+      "epoch": 0.26014801525005604,
+      "grad_norm": 0.24847449362277985,
+      "learning_rate": 1.734790856605204e-05,
+      "loss": 2.4666,
+      "step": 580
+    },
+    {
+      "epoch": 0.26911863646557527,
+      "grad_norm": 0.20716962218284607,
+      "learning_rate": 1.714770840182273e-05,
+      "loss": 2.4222,
+      "step": 600
+    },
+    {
+      "epoch": 0.27808925768109444,
+      "grad_norm": 0.24155037105083466,
+      "learning_rate": 1.6941471745764996e-05,
+      "loss": 2.4417,
+      "step": 620
+    },
+    {
+      "epoch": 0.2870598788966136,
+      "grad_norm": 0.2298847883939743,
+      "learning_rate": 1.672937277200837e-05,
+      "loss": 2.5199,
+      "step": 640
+    },
+    {
+      "epoch": 0.2960305001121328,
+      "grad_norm": 0.22792883217334747,
+      "learning_rate": 1.6511590605616423e-05,
+      "loss": 2.4298,
+      "step": 660
+    },
+    {
+      "epoch": 0.30500112132765195,
+      "grad_norm": 0.2478325068950653,
+      "learning_rate": 1.628830917130935e-05,
+      "loss": 2.494,
+      "step": 680
+    },
+    {
+      "epoch": 0.3139717425431711,
+      "grad_norm": 0.22993171215057373,
+      "learning_rate": 1.6059717038133038e-05,
+      "loss": 2.5366,
+      "step": 700
+    },
+    {
+      "epoch": 0.3229423637586903,
+      "grad_norm": 0.23914852738380432,
+      "learning_rate": 1.5826007260205868e-05,
+      "loss": 2.4151,
+      "step": 720
+    },
+    {
+      "epoch": 0.33191298497420946,
+      "grad_norm": 0.2367142289876938,
+      "learning_rate": 1.5587377213677705e-05,
+      "loss": 2.3964,
+      "step": 740
+    },
+    {
+      "epoch": 0.3408836061897286,
+      "grad_norm": 0.25746768712997437,
+      "learning_rate": 1.5344028430038764e-05,
+      "loss": 2.4184,
+      "step": 760
+    },
+    {
+      "epoch": 0.3498542274052478,
+      "grad_norm": 0.23596778512001038,
+      "learning_rate": 1.5096166425919176e-05,
+      "loss": 2.4126,
+      "step": 780
+    },
+    {
+      "epoch": 0.35882484862076697,
+      "grad_norm": 0.22444961965084076,
+      "learning_rate": 1.4844000529522942e-05,
+      "loss": 2.4106,
+      "step": 800
+    },
+    {
+      "epoch": 0.36779546983628614,
+      "grad_norm": 0.2117769718170166,
+      "learning_rate": 1.458774370384287e-05,
+      "loss": 2.4786,
+      "step": 820
+    },
+    {
+      "epoch": 0.37676609105180536,
+      "grad_norm": 0.19232399761676788,
+      "learning_rate": 1.4327612366805832e-05,
+      "loss": 2.436,
+      "step": 840
+    },
+    {
+      "epoch": 0.38573671226732453,
+      "grad_norm": 0.2247142344713211,
+      "learning_rate": 1.4063826208500182e-05,
+      "loss": 2.5193,
+      "step": 860
+    },
+    {
+      "epoch": 0.3947073334828437,
+      "grad_norm": 0.24078157544136047,
+      "learning_rate": 1.3796608005639738e-05,
+      "loss": 2.5,
+      "step": 880
+    },
+    {
+      "epoch": 0.4036779546983629,
+      "grad_norm": 0.2095336616039276,
+      "learning_rate": 1.352618343342098e-05,
+      "loss": 2.4365,
+      "step": 900
+    },
+    {
+      "epoch": 0.41264857591388204,
+      "grad_norm": 0.2053331881761551,
+      "learning_rate": 1.3252780874932395e-05,
+      "loss": 2.4161,
+      "step": 920
+    },
+    {
+      "epoch": 0.4216191971294012,
+      "grad_norm": 0.20598524808883667,
+      "learning_rate": 1.2976631228276894e-05,
+      "loss": 2.4314,
+      "step": 940
+    },
+    {
+      "epoch": 0.4305898183449204,
+      "grad_norm": 0.2612752318382263,
+      "learning_rate": 1.2697967711570243e-05,
+      "loss": 2.3568,
+      "step": 960
+    },
+    {
+      "epoch": 0.43956043956043955,
+      "grad_norm": 0.18916811048984528,
+      "learning_rate": 1.2417025665980114e-05,
+      "loss": 2.4058,
+      "step": 980
+    },
+    {
+      "epoch": 0.4485310607759587,
+      "grad_norm": 0.23678423464298248,
+      "learning_rate": 1.2134042356972175e-05,
+      "loss": 2.4794,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4575016819914779,
+      "grad_norm": 0.204436257481575,
+      "learning_rate": 1.1849256773931058e-05,
+      "loss": 2.3,
+      "step": 1020
+    },
+    {
+      "epoch": 0.46647230320699706,
+      "grad_norm": 0.20532362163066864,
+      "learning_rate": 1.156290942832536e-05,
+      "loss": 2.3845,
+      "step": 1040
+    },
+    {
+      "epoch": 0.47544292442251623,
+      "grad_norm": 0.2221594899892807,
+      "learning_rate": 1.1275242150587254e-05,
+      "loss": 2.4282,
+      "step": 1060
+    },
+    {
+      "epoch": 0.48441354563803546,
+      "grad_norm": 0.23072290420532227,
+      "learning_rate": 1.0986497885878145e-05,
+      "loss": 2.3869,
+      "step": 1080
+    },
+    {
+      "epoch": 0.49338416685355463,
+      "grad_norm": 0.20840080082416534,
+      "learning_rate": 1.0696920488912923e-05,
+      "loss": 2.4322,
+      "step": 1100
+    },
+    {
+      "epoch": 0.5023547880690737,
+      "grad_norm": 0.23384802043437958,
+      "learning_rate": 1.0406754518016047e-05,
+      "loss": 2.506,
+      "step": 1120
+    },
+    {
+      "epoch": 0.5113254092845929,
+      "grad_norm": 0.22102180123329163,
+      "learning_rate": 1.0116245028583418e-05,
+      "loss": 2.4869,
+      "step": 1140
+    },
+    {
+      "epoch": 0.5202960305001121,
+      "grad_norm": 0.18945130705833435,
+      "learning_rate": 9.825637366124458e-06,
+      "loss": 2.3671,
+      "step": 1160
+    },
+    {
+      "epoch": 0.5292666517156313,
+      "grad_norm": 0.22617211937904358,
+      "learning_rate": 9.535176959059171e-06,
+      "loss": 2.423,
+      "step": 1180
+    },
+    {
+      "epoch": 0.5382372729311505,
+      "grad_norm": 0.2126481682062149,
+      "learning_rate": 9.245109111445189e-06,
+      "loss": 2.3887,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5472078941466697,
+      "grad_norm": 0.22861531376838684,
+      "learning_rate": 8.95567879580984e-06,
+      "loss": 2.36,
+      "step": 1220
+    },
+    {
+      "epoch": 0.5561785153621889,
+      "grad_norm": 0.2701588273048401,
+      "learning_rate": 8.667130446262214e-06,
+      "loss": 2.401,
+      "step": 1240
+    },
+    {
+      "epoch": 0.565149136577708,
+      "grad_norm": 0.2689853608608246,
+      "learning_rate": 8.379707752059932e-06,
+      "loss": 2.3753,
+      "step": 1260
+    },
+    {
+      "epoch": 0.5741197577932272,
+      "grad_norm": 0.22886186838150024,
+      "learning_rate": 8.093653451804987e-06,
+      "loss": 2.4807,
+      "step": 1280
+    },
+    {
+      "epoch": 0.5830903790087464,
+      "grad_norm": 0.24667732417583466,
+      "learning_rate": 7.809209128442408e-06,
+      "loss": 2.4269,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5920610002242656,
+      "grad_norm": 0.22338470816612244,
+      "learning_rate": 7.52661500523497e-06,
+      "loss": 2.4133,
+      "step": 1320
+    },
+    {
+      "epoch": 0.6010316214397847,
+      "grad_norm": 0.21941307187080383,
+      "learning_rate": 7.246109742886156e-06,
+      "loss": 2.4606,
+      "step": 1340
+    },
+    {
+      "epoch": 0.6100022426553039,
+      "grad_norm": 0.22378675639629364,
+      "learning_rate": 6.967930237982793e-06,
+      "loss": 2.3498,
+      "step": 1360
+    },
+    {
+      "epoch": 0.6189728638708231,
+      "grad_norm": 0.23019564151763916,
+      "learning_rate": 6.692311422927515e-06,
+      "loss": 2.3927,
+      "step": 1380
+    },
+    {
+      "epoch": 0.6279434850863422,
+      "grad_norm": 0.22530066967010498,
+      "learning_rate": 6.4194860675300695e-06,
+      "loss": 2.4463,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6369141063018614,
+      "grad_norm": 0.2268647849559784,
+      "learning_rate": 6.149684582425013e-06,
+      "loss": 2.4025,
+      "step": 1420
+    },
+    {
+      "epoch": 0.6458847275173806,
+      "grad_norm": 0.2626585066318512,
+      "learning_rate": 5.883134824481786e-06,
+      "loss": 2.3956,
+      "step": 1440
+    },
+    {
+      "epoch": 0.6548553487328997,
+      "grad_norm": 0.24002288281917572,
+      "learning_rate": 5.620061904371565e-06,
+      "loss": 2.3784,
+      "step": 1460
+    },
+    {
+      "epoch": 0.6638259699484189,
+      "grad_norm": 0.2304755002260208,
+      "learning_rate": 5.360687996453348e-06,
+      "loss": 2.4067,
+      "step": 1480
+    },
+    {
+      "epoch": 0.6727965911639381,
+      "grad_norm": 0.22266767919063568,
+      "learning_rate": 5.105232151139895e-06,
+      "loss": 2.4311,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6817672123794573,
+      "grad_norm": 0.25173887610435486,
+      "learning_rate": 4.853910109901901e-06,
+      "loss": 2.3631,
+      "step": 1520
+    },
+    {
+      "epoch": 0.6907378335949764,
+      "grad_norm": 0.2748667299747467,
+      "learning_rate": 4.606934123066739e-06,
+      "loss": 2.3062,
+      "step": 1540
+    },
+    {
+      "epoch": 0.6997084548104956,
+      "grad_norm": 0.3214375972747803,
+      "learning_rate": 4.3645127705655654e-06,
+      "loss": 2.4436,
+      "step": 1560
+    },
+    {
+      "epoch": 0.7086790760260148,
+      "grad_norm": 0.25126707553863525,
+      "learning_rate": 4.126850785780199e-06,
+      "loss": 2.5224,
+      "step": 1580
+    },
+    {
+      "epoch": 0.7176496972415339,
+      "grad_norm": 0.24009720981121063,
+      "learning_rate": 3.8941488826385855e-06,
+      "loss": 2.3984,
+      "step": 1600
+    },
+    {
+      "epoch": 0.7266203184570531,
+      "grad_norm": 0.269382506608963,
+      "learning_rate": 3.6666035861047744e-06,
+      "loss": 2.4344,
+      "step": 1620
+    },
+    {
+      "epoch": 0.7355909396725723,
+      "grad_norm": 0.2938186526298523,
+      "learning_rate": 3.444407066206692e-06,
+      "loss": 2.3371,
+      "step": 1640
+    },
+    {
+      "epoch": 0.7445615608880914,
+      "grad_norm": 0.23332324624061584,
+      "learning_rate": 3.2277469757417403e-06,
+      "loss": 2.3741,
+      "step": 1660
+    },
+    {
+      "epoch": 0.7535321821036107,
+      "grad_norm": 0.24623927474021912,
+      "learning_rate": 3.0168062917974173e-06,
+      "loss": 2.3467,
+      "step": 1680
+    },
+    {
+      "epoch": 0.7625028033191299,
+      "grad_norm": 0.2714971601963043,
+      "learning_rate": 2.8117631612207084e-06,
+      "loss": 2.3712,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7714734245346491,
+      "grad_norm": 0.24978309869766235,
+      "learning_rate": 2.6127907501667726e-06,
+      "loss": 2.4389,
+      "step": 1720
+    },
+    {
+      "epoch": 0.7804440457501682,
+      "grad_norm": 0.25042879581451416,
+      "learning_rate": 2.420057097854046e-06,
+      "loss": 2.3793,
+      "step": 1740
+    },
+    {
+      "epoch": 0.7894146669656874,
+      "grad_norm": 0.2673550248146057,
+      "learning_rate": 2.2337249746491695e-06,
+      "loss": 2.3452,
+      "step": 1760
+    },
+    {
+      "epoch": 0.7983852881812066,
+      "grad_norm": 0.2639400362968445,
+      "learning_rate": 2.0539517446016975e-06,
+      "loss": 2.3364,
+      "step": 1780
+    },
+    {
+      "epoch": 0.8073559093967257,
+      "grad_norm": 0.2685152590274811,
+      "learning_rate": 1.880889232544585e-06,
+      "loss": 2.3915,
+      "step": 1800
+    },
+    {
+      "epoch": 0.8163265306122449,
+      "grad_norm": 0.2522066533565521,
+      "learning_rate": 1.714683595872777e-06,
+      "loss": 2.3616,
+      "step": 1820
+    },
+    {
+      "epoch": 0.8252971518277641,
+      "grad_norm": 0.25205305218696594,
+      "learning_rate": 1.5554752011081332e-06,
+      "loss": 2.3692,
+      "step": 1840
+    },
+    {
+      "epoch": 0.8342677730432833,
+      "grad_norm": 0.30145809054374695,
+      "learning_rate": 1.4033985053549425e-06,
+      "loss": 2.3174,
+      "step": 1860
+    },
+    {
+      "epoch": 0.8432383942588024,
+      "grad_norm": 0.2629565894603729,
+      "learning_rate": 1.2585819427461564e-06,
+      "loss": 2.3526,
+      "step": 1880
+    },
+    {
+      "epoch": 0.8522090154743216,
+      "grad_norm": 0.2707832455635071,
+      "learning_rate": 1.121147815976248e-06,
+      "loss": 2.3042,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8611796366898408,
+      "grad_norm": 0.23665176331996918,
+      "learning_rate": 9.912121930122542e-07,
+      "loss": 2.3199,
+      "step": 1920
+    },
+    {
+      "epoch": 0.8701502579053599,
+      "grad_norm": 0.2816649377346039,
+      "learning_rate": 8.688848090702928e-07,
+      "loss": 2.331,
+      "step": 1940
+    },
+    {
+      "epoch": 0.8791208791208791,
+      "grad_norm": 0.27171188592910767,
+      "learning_rate": 7.542689739403097e-07,
+      "loss": 2.3775,
+      "step": 1960
+    },
+    {
+      "epoch": 0.8880915003363983,
+      "grad_norm": 0.250848650932312,
+      "learning_rate": 6.474614847373051e-07,
+      "loss": 2.3671,
+      "step": 1980
+    },
+    {
+      "epoch": 0.8970621215519174,
+      "grad_norm": 0.2723957598209381,
+      "learning_rate": 5.485525441527651e-07,
+      "loss": 2.2999,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9060327427674366,
+      "grad_norm": 0.2722890079021454,
+      "learning_rate": 4.5762568427529795e-07,
+      "loss": 2.3807,
+      "step": 2020
+    },
+    {
+      "epoch": 0.9150033639829558,
+      "grad_norm": 0.2557665705680847,
+      "learning_rate": 3.747576960448551e-07,
+      "loss": 2.3589,
+      "step": 2040
+    },
+    {
+      "epoch": 0.923973985198475,
+      "grad_norm": 0.3588675856590271,
+      "learning_rate": 3.0001856440005307e-07,
+      "loss": 2.3025,
+      "step": 2060
+    },
+    {
+      "epoch": 0.9329446064139941,
+      "grad_norm": 0.2574796676635742,
+      "learning_rate": 2.3347140917344579e-07,
+      "loss": 2.4445,
+      "step": 2080
+    },
+    {
+      "epoch": 0.9419152276295133,
+      "grad_norm": 0.2603015899658203,
+      "learning_rate": 1.7517243178458486e-07,
+      "loss": 2.3884,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9508858488450325,
+      "grad_norm": 0.2759827673435211,
+      "learning_rate": 1.2517086777594112e-07,
+      "loss": 2.3706,
+      "step": 2120
+    },
+    {
+      "epoch": 0.9598564700605517,
+      "grad_norm": 0.2819114327430725,
+      "learning_rate": 8.35089452317639e-08,
+      "loss": 2.2965,
+      "step": 2140
+    },
+    {
+      "epoch": 0.9688270912760709,
+      "grad_norm": 0.2688797414302826,
+      "learning_rate": 5.022184911495864e-08,
+      "loss": 2.3142,
+      "step": 2160
+    },
+    {
+      "epoch": 0.9777977124915901,
+      "grad_norm": 0.2871028780937195,
+      "learning_rate": 2.5337691552156372e-08,
+      "loss": 2.3665,
+      "step": 2180
+    },
+    {
+      "epoch": 0.9867683337071093,
+      "grad_norm": 0.25957873463630676,
+      "learning_rate": 8.877488092022823e-09,
+      "loss": 2.3602,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9957389549226284,
+      "grad_norm": 0.2717770040035248,
+      "learning_rate": 8.551399568945684e-10,
+      "loss": 2.3519,
+      "step": 2220
+    },
+    {
+      "epoch": 0.999775734469612,
+      "step": 2229,
+      "total_flos": 3.1724140856564777e+18,
+      "train_loss": 2.4421788879230237,
+      "train_runtime": 8917.1095,
+      "train_samples_per_second": 15.998,
+      "train_steps_per_second": 0.25
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 2229,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 5000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.1724140856564777e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a10f4490c03e79d1b69c6be2dcbaa1baf82d77b75e6ee314eabb97033b494c5
+size 12216

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff