William
commited on
Commit
·
bf8969c
1
Parent(s):
c1daed0
opi_full_tuned_model
Browse files- config.json +32 -0
- generation_config.json +7 -0
- pytorch_model-1-of-32.bin +3 -0
- pytorch_model-10-of-32.bin +3 -0
- pytorch_model-11-of-32.bin +3 -0
- pytorch_model-12-of-32.bin +3 -0
- pytorch_model-13-of-32.bin +3 -0
- pytorch_model-14-of-32.bin +3 -0
- pytorch_model-15-of-32.bin +3 -0
- pytorch_model-16-of-32.bin +3 -0
- pytorch_model-17-of-32.bin +3 -0
- pytorch_model-18-of-32.bin +3 -0
- pytorch_model-19-of-32.bin +3 -0
- pytorch_model-2-of-32.bin +3 -0
- pytorch_model-20-of-32.bin +3 -0
- pytorch_model-21-of-32.bin +3 -0
- pytorch_model-22-of-32.bin +3 -0
- pytorch_model-23-of-32.bin +3 -0
- pytorch_model-24-of-32.bin +3 -0
- pytorch_model-25-of-32.bin +3 -0
- pytorch_model-26-of-32.bin +3 -0
- pytorch_model-27-of-32.bin +3 -0
- pytorch_model-28-of-32.bin +3 -0
- pytorch_model-29-of-32.bin +3 -0
- pytorch_model-3-of-32.bin +3 -0
- pytorch_model-30-of-32.bin +3 -0
- pytorch_model-31-of-32.bin +3 -0
- pytorch_model-32-of-32.bin +3 -0
- pytorch_model-4-of-32.bin +3 -0
- pytorch_model-5-of-32.bin +3 -0
- pytorch_model-6-of-32.bin +3 -0
- pytorch_model-7-of-32.bin +3 -0
- pytorch_model-8-of-32.bin +3 -0
- pytorch_model-9-of-32.bin +3 -0
- pytorch_model.bin.index.json +1 -0
- special_tokens_map.json +6 -0
- tokenizer.json +0 -0
- tokenizer_config.json +6 -0
- trainer_state.json +127 -0
- training_args.bin +3 -0
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/share/project/xiaohongwang/LLM_checkpoints/galai/galactica-6.7b",
|
3 |
+
"_remove_final_layer_norm": false,
|
4 |
+
"activation_dropout": 0.0,
|
5 |
+
"activation_function": "gelu",
|
6 |
+
"architectures": [
|
7 |
+
"OPTForCausalLM"
|
8 |
+
],
|
9 |
+
"attention_dropout": 0.1,
|
10 |
+
"bos_token_id": 0,
|
11 |
+
"do_layer_norm_before": true,
|
12 |
+
"dropout": 0.1,
|
13 |
+
"enable_bias": true,
|
14 |
+
"eos_token_id": 2,
|
15 |
+
"ffn_dim": 16384,
|
16 |
+
"hidden_size": 4096,
|
17 |
+
"init_std": 0.02,
|
18 |
+
"layer_norm_elementwise_affine": true,
|
19 |
+
"layerdrop": 0.0,
|
20 |
+
"learned_embeddings": true,
|
21 |
+
"max_position_embeddings": 2048,
|
22 |
+
"model_type": "opt",
|
23 |
+
"num_attention_heads": 32,
|
24 |
+
"num_hidden_layers": 32,
|
25 |
+
"pad_token_id": 1,
|
26 |
+
"scale_embeddings": false,
|
27 |
+
"torch_dtype": "bfloat16",
|
28 |
+
"transformers_version": "4.28.1",
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 50001,
|
31 |
+
"word_embed_proj_dim": 4096
|
32 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 0,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 1,
|
6 |
+
"transformers_version": "4.28.1"
|
7 |
+
}
|
pytorch_model-1-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33851cf19ec9838c81ab69cea078b938134a369c835b7254044adf13392f0bd2
|
3 |
+
size 1658361449
|
pytorch_model-10-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21f717222b7cda430c1314ab03a4faaff71024053bea284c20fb50bf3e31fead
|
3 |
+
size 805523957
|
pytorch_model-11-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9528e113dc74ed0a25d4ca1c74f9ac85ec93e3ef50f9aacc205504482ddd64f6
|
3 |
+
size 805523957
|
pytorch_model-12-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:730cee9d38d7df6fe2f6a56e26daf5ca81d573faa41e5046280e1ff7c5169fba
|
3 |
+
size 805523957
|
pytorch_model-13-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53f0e9cc2fb124a4bc186c6f49d2ad27245423ecd10e3e7a566e78fb1d3cd377
|
3 |
+
size 805523957
|
pytorch_model-14-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bc4add41baef7f74caa4f07fd3ace481ad8ba706914e114983c3eb9e6845a12
|
3 |
+
size 805523957
|
pytorch_model-15-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70d8328d3a735b1cc484161814e969e97a5e8ff786512cf03b04962667137708
|
3 |
+
size 805523957
|
pytorch_model-16-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e81cdcfb2bff10d49eb00a449b652578686cbb36f01eca74eb49c537acbb627
|
3 |
+
size 805523957
|
pytorch_model-17-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18aa842f6f7cb8a491a1c9c46136fdfd5aced6103cbfe42ad22b1e5bf32e3ba3
|
3 |
+
size 805523957
|
pytorch_model-18-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:257458025f14cc240435ceb4eaec213ffa3ea68c0bcd5147d9df4aff7aca1d97
|
3 |
+
size 805523957
|
pytorch_model-19-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06f2c30b6d7e69811409ee73fcfb65938843b68f7943ee9178dcf7758928b58f
|
3 |
+
size 805523957
|
pytorch_model-2-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5659e53f7b0c9455892d4d86d55631848d72909da303938f51eccbc932c87e4
|
3 |
+
size 805523957
|
pytorch_model-20-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e470113e2fc15266b34d1543bd71fcc73bf4dcc8ba7fb76b284b8114cba0de4b
|
3 |
+
size 805523957
|
pytorch_model-21-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a07c8fb9b9a2477215fbfecade440cbbb09d7f2ee11cf837d6730233e7ab35d9
|
3 |
+
size 805523957
|
pytorch_model-22-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:086c97e31fce8a97268a04f3cc9ab3d58ee954a6fdd53e16d6df507bfd9a833c
|
3 |
+
size 805523957
|
pytorch_model-23-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a83cd31c4c13046c83657e05df2f5209d8d4220060ac2e21ded5dd651f5b7e0
|
3 |
+
size 805523957
|
pytorch_model-24-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8520e04e8e46e893928be3787fd723e7e954ebf76a3901e07168c1684186e458
|
3 |
+
size 805523957
|
pytorch_model-25-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b4a027bf43608f1db0ab7865db1a6da4ec7791ff763d2898693d540b1d5034d
|
3 |
+
size 805523957
|
pytorch_model-26-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:958485a57eb8da5f7ab6fc5088ac082fcaec51ce04231008768fd178f740fd8a
|
3 |
+
size 805523957
|
pytorch_model-27-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3b8c90bcc069a0b4a9443026c291c8d39ac455cae370c8fcbfdafdb025281f0
|
3 |
+
size 805523957
|
pytorch_model-28-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e81e1d3234156366bb8a3281283df8c2dc1545c2d7df48148104e692eeddaf23
|
3 |
+
size 805523957
|
pytorch_model-29-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83c9303e57b75c11e6d294d29b9b88f0a050f91dcaddceb6aedde39083bec47b
|
3 |
+
size 805523957
|
pytorch_model-3-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64e800582d0d01e32a9cc7b38aa393f992f693d1f92af86c486c0d6fde118d75
|
3 |
+
size 805523957
|
pytorch_model-30-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c94ac13a208adbecd84f03060a90fa37f6d8da99575e61f1b311f1681bf31f62
|
3 |
+
size 805523957
|
pytorch_model-31-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f21492733523958e62883b9d591ba94c190b2bfcd60319e60e066598153040a1
|
3 |
+
size 805523957
|
pytorch_model-32-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d0ede5fbb5b7d971d1d3bb02eaf5cbc998c8e2d0a16c0657c3366f493d207fc
|
3 |
+
size 1624740658
|
pytorch_model-4-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:270822c412e74256763a7406b88f9c51b66759e062fcfeb44703d39d30783c71
|
3 |
+
size 805523957
|
pytorch_model-5-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e05e99ab17b48f57d563368043850c25de9c2f07b5bae195b2ceb8bbd113a99
|
3 |
+
size 805523957
|
pytorch_model-6-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64007b17970cd393fddbb50e580dd5a531f17ef79b647857d77cce6142a8f063
|
3 |
+
size 805523957
|
pytorch_model-7-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b96ca35655a151dd4003e286b413dddc4212be8b676bd2f74d0cbfe61812fde
|
3 |
+
size 805523957
|
pytorch_model-8-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b4b562f760b7441d90dd9b5e699ac5153d6ceaf0773d7f6d937bceb022a2895
|
3 |
+
size 805523957
|
pytorch_model-9-of-32.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4b1189685df26dc60d1076441d96169348ca8a8084dc12770ef1b8fe38eb168
|
3 |
+
size 805523957
|
pytorch_model.bin.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata": {"total_size": 13724336128}, "weight_map": {"model.decoder.layers.0.self_attn.q_proj.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.q_proj.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.k_proj.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.k_proj.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.v_proj.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.v_proj.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.out_proj.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn.out_proj.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn_layer_norm.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.self_attn_layer_norm.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.fc1.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.fc1.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.fc2.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.fc2.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.final_layer_norm.weight": "pytorch_model-1-of-32.bin", "model.decoder.layers.0.final_layer_norm.bias": "pytorch_model-1-of-32.bin", "model.decoder.embed_tokens.weight": "pytorch_model-1-of-32.bin", "model.decoder.embed_positions.weight": "pytorch_model-1-of-32.bin", "model.decoder.final_layer_norm.weight": "pytorch_model-1-of-32.bin", "model.decoder.final_layer_norm.bias": "pytorch_model-1-of-32.bin", "model.decoder.layers.1.self_attn.q_proj.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.q_proj.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.k_proj.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.k_proj.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.v_proj.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.v_proj.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.out_proj.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn.out_proj.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn_layer_norm.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.self_attn_layer_norm.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.fc1.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.fc1.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.fc2.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.fc2.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.final_layer_norm.weight": "pytorch_model-2-of-32.bin", "model.decoder.layers.1.final_layer_norm.bias": "pytorch_model-2-of-32.bin", "model.decoder.layers.2.self_attn.q_proj.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.q_proj.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.k_proj.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.k_proj.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.v_proj.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.v_proj.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.out_proj.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn.out_proj.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn_layer_norm.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.self_attn_layer_norm.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.fc1.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.fc1.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.fc2.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.fc2.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.final_layer_norm.weight": "pytorch_model-3-of-32.bin", "model.decoder.layers.2.final_layer_norm.bias": "pytorch_model-3-of-32.bin", "model.decoder.layers.3.self_attn.q_proj.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.q_proj.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.k_proj.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.k_proj.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.v_proj.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.v_proj.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.out_proj.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn.out_proj.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn_layer_norm.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.self_attn_layer_norm.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.fc1.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.fc1.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.fc2.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.fc2.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.final_layer_norm.weight": "pytorch_model-4-of-32.bin", "model.decoder.layers.3.final_layer_norm.bias": "pytorch_model-4-of-32.bin", "model.decoder.layers.4.self_attn.q_proj.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.q_proj.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.k_proj.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.k_proj.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.v_proj.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.v_proj.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.out_proj.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn.out_proj.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn_layer_norm.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.self_attn_layer_norm.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.fc1.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.fc1.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.fc2.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.fc2.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.final_layer_norm.weight": "pytorch_model-5-of-32.bin", "model.decoder.layers.4.final_layer_norm.bias": "pytorch_model-5-of-32.bin", "model.decoder.layers.5.self_attn.q_proj.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.q_proj.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.k_proj.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.k_proj.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.v_proj.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.v_proj.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.out_proj.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn.out_proj.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn_layer_norm.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.self_attn_layer_norm.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.fc1.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.fc1.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.fc2.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.fc2.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.final_layer_norm.weight": "pytorch_model-6-of-32.bin", "model.decoder.layers.5.final_layer_norm.bias": "pytorch_model-6-of-32.bin", "model.decoder.layers.6.self_attn.q_proj.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.q_proj.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.k_proj.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.k_proj.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.v_proj.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.v_proj.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.out_proj.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn.out_proj.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn_layer_norm.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.self_attn_layer_norm.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.fc1.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.fc1.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.fc2.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.fc2.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.final_layer_norm.weight": "pytorch_model-7-of-32.bin", "model.decoder.layers.6.final_layer_norm.bias": "pytorch_model-7-of-32.bin", "model.decoder.layers.7.self_attn.q_proj.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.q_proj.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.k_proj.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.k_proj.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.v_proj.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.v_proj.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.out_proj.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn.out_proj.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn_layer_norm.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.self_attn_layer_norm.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.fc1.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.fc1.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.fc2.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.fc2.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.final_layer_norm.weight": "pytorch_model-8-of-32.bin", "model.decoder.layers.7.final_layer_norm.bias": "pytorch_model-8-of-32.bin", "model.decoder.layers.8.self_attn.q_proj.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.q_proj.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.k_proj.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.k_proj.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.v_proj.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.v_proj.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.out_proj.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn.out_proj.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn_layer_norm.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.self_attn_layer_norm.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.fc1.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.fc1.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.fc2.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.fc2.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.final_layer_norm.weight": "pytorch_model-9-of-32.bin", "model.decoder.layers.8.final_layer_norm.bias": "pytorch_model-9-of-32.bin", "model.decoder.layers.9.self_attn.q_proj.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.q_proj.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.k_proj.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.k_proj.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.v_proj.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.v_proj.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.out_proj.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn.out_proj.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn_layer_norm.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.self_attn_layer_norm.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.fc1.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.fc1.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.fc2.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.fc2.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.final_layer_norm.weight": "pytorch_model-10-of-32.bin", "model.decoder.layers.9.final_layer_norm.bias": "pytorch_model-10-of-32.bin", "model.decoder.layers.10.self_attn.q_proj.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.q_proj.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.k_proj.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.k_proj.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.v_proj.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.v_proj.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.out_proj.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn.out_proj.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn_layer_norm.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.self_attn_layer_norm.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.fc1.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.fc1.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.fc2.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.fc2.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.final_layer_norm.weight": "pytorch_model-11-of-32.bin", "model.decoder.layers.10.final_layer_norm.bias": "pytorch_model-11-of-32.bin", "model.decoder.layers.11.self_attn.q_proj.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.q_proj.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.k_proj.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.k_proj.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.v_proj.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.v_proj.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.out_proj.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn.out_proj.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn_layer_norm.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.self_attn_layer_norm.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.fc1.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.fc1.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.fc2.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.fc2.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.final_layer_norm.weight": "pytorch_model-12-of-32.bin", "model.decoder.layers.11.final_layer_norm.bias": "pytorch_model-12-of-32.bin", "model.decoder.layers.12.self_attn.q_proj.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.q_proj.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.k_proj.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.k_proj.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.v_proj.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.v_proj.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.out_proj.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn.out_proj.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn_layer_norm.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.self_attn_layer_norm.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.fc1.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.fc1.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.fc2.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.fc2.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.final_layer_norm.weight": "pytorch_model-13-of-32.bin", "model.decoder.layers.12.final_layer_norm.bias": "pytorch_model-13-of-32.bin", "model.decoder.layers.13.self_attn.q_proj.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.q_proj.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.k_proj.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.k_proj.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.v_proj.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.v_proj.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.out_proj.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn.out_proj.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn_layer_norm.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.self_attn_layer_norm.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.fc1.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.fc1.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.fc2.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.fc2.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.final_layer_norm.weight": "pytorch_model-14-of-32.bin", "model.decoder.layers.13.final_layer_norm.bias": "pytorch_model-14-of-32.bin", "model.decoder.layers.14.self_attn.q_proj.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.q_proj.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.k_proj.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.k_proj.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.v_proj.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.v_proj.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.out_proj.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn.out_proj.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn_layer_norm.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.self_attn_layer_norm.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.fc1.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.fc1.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.fc2.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.fc2.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.final_layer_norm.weight": "pytorch_model-15-of-32.bin", "model.decoder.layers.14.final_layer_norm.bias": "pytorch_model-15-of-32.bin", "model.decoder.layers.15.self_attn.q_proj.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.q_proj.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.k_proj.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.k_proj.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.v_proj.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.v_proj.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.out_proj.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn.out_proj.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn_layer_norm.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.self_attn_layer_norm.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.fc1.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.fc1.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.fc2.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.fc2.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.final_layer_norm.weight": "pytorch_model-16-of-32.bin", "model.decoder.layers.15.final_layer_norm.bias": "pytorch_model-16-of-32.bin", "model.decoder.layers.16.self_attn.q_proj.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.q_proj.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.k_proj.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.k_proj.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.v_proj.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.v_proj.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.out_proj.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn.out_proj.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn_layer_norm.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.self_attn_layer_norm.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.fc1.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.fc1.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.fc2.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.fc2.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.final_layer_norm.weight": "pytorch_model-17-of-32.bin", "model.decoder.layers.16.final_layer_norm.bias": "pytorch_model-17-of-32.bin", "model.decoder.layers.17.self_attn.q_proj.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.q_proj.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.k_proj.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.k_proj.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.v_proj.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.v_proj.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.out_proj.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn.out_proj.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn_layer_norm.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.self_attn_layer_norm.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.fc1.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.fc1.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.fc2.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.fc2.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.final_layer_norm.weight": "pytorch_model-18-of-32.bin", "model.decoder.layers.17.final_layer_norm.bias": "pytorch_model-18-of-32.bin", "model.decoder.layers.18.self_attn.q_proj.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.q_proj.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.k_proj.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.k_proj.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.v_proj.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.v_proj.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.out_proj.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn.out_proj.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn_layer_norm.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.self_attn_layer_norm.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.fc1.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.fc1.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.fc2.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.fc2.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.final_layer_norm.weight": "pytorch_model-19-of-32.bin", "model.decoder.layers.18.final_layer_norm.bias": "pytorch_model-19-of-32.bin", "model.decoder.layers.19.self_attn.q_proj.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.q_proj.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.k_proj.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.k_proj.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.v_proj.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.v_proj.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.out_proj.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn.out_proj.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn_layer_norm.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.self_attn_layer_norm.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.fc1.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.fc1.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.fc2.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.fc2.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.final_layer_norm.weight": "pytorch_model-20-of-32.bin", "model.decoder.layers.19.final_layer_norm.bias": "pytorch_model-20-of-32.bin", "model.decoder.layers.20.self_attn.q_proj.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.q_proj.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.k_proj.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.k_proj.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.v_proj.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.v_proj.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.out_proj.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn.out_proj.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn_layer_norm.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.self_attn_layer_norm.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.fc1.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.fc1.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.fc2.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.fc2.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.final_layer_norm.weight": "pytorch_model-21-of-32.bin", "model.decoder.layers.20.final_layer_norm.bias": "pytorch_model-21-of-32.bin", "model.decoder.layers.21.self_attn.q_proj.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.q_proj.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.k_proj.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.k_proj.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.v_proj.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.v_proj.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.out_proj.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn.out_proj.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn_layer_norm.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.self_attn_layer_norm.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.fc1.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.fc1.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.fc2.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.fc2.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.final_layer_norm.weight": "pytorch_model-22-of-32.bin", "model.decoder.layers.21.final_layer_norm.bias": "pytorch_model-22-of-32.bin", "model.decoder.layers.22.self_attn.q_proj.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.q_proj.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.k_proj.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.k_proj.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.v_proj.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.v_proj.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.out_proj.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn.out_proj.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn_layer_norm.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.self_attn_layer_norm.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.fc1.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.fc1.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.fc2.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.fc2.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.final_layer_norm.weight": "pytorch_model-23-of-32.bin", "model.decoder.layers.22.final_layer_norm.bias": "pytorch_model-23-of-32.bin", "model.decoder.layers.23.self_attn.q_proj.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.q_proj.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.k_proj.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.k_proj.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.v_proj.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.v_proj.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.out_proj.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn.out_proj.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn_layer_norm.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.self_attn_layer_norm.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.fc1.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.fc1.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.fc2.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.fc2.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.final_layer_norm.weight": "pytorch_model-24-of-32.bin", "model.decoder.layers.23.final_layer_norm.bias": "pytorch_model-24-of-32.bin", "model.decoder.layers.24.self_attn.q_proj.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.q_proj.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.k_proj.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.k_proj.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.v_proj.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.v_proj.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.out_proj.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn.out_proj.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn_layer_norm.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.self_attn_layer_norm.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.fc1.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.fc1.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.fc2.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.fc2.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.final_layer_norm.weight": "pytorch_model-25-of-32.bin", "model.decoder.layers.24.final_layer_norm.bias": "pytorch_model-25-of-32.bin", "model.decoder.layers.25.self_attn.q_proj.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.q_proj.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.k_proj.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.k_proj.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.v_proj.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.v_proj.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.out_proj.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn.out_proj.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn_layer_norm.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.self_attn_layer_norm.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.fc1.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.fc1.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.fc2.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.fc2.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.final_layer_norm.weight": "pytorch_model-26-of-32.bin", "model.decoder.layers.25.final_layer_norm.bias": "pytorch_model-26-of-32.bin", "model.decoder.layers.26.self_attn.q_proj.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.q_proj.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.k_proj.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.k_proj.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.v_proj.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.v_proj.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.out_proj.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn.out_proj.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn_layer_norm.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.self_attn_layer_norm.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.fc1.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.fc1.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.fc2.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.fc2.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.final_layer_norm.weight": "pytorch_model-27-of-32.bin", "model.decoder.layers.26.final_layer_norm.bias": "pytorch_model-27-of-32.bin", "model.decoder.layers.27.self_attn.q_proj.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.q_proj.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.k_proj.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.k_proj.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.v_proj.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.v_proj.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.out_proj.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn.out_proj.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn_layer_norm.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.self_attn_layer_norm.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.fc1.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.fc1.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.fc2.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.fc2.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.final_layer_norm.weight": "pytorch_model-28-of-32.bin", "model.decoder.layers.27.final_layer_norm.bias": "pytorch_model-28-of-32.bin", "model.decoder.layers.28.self_attn.q_proj.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.q_proj.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.k_proj.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.k_proj.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.v_proj.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.v_proj.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.out_proj.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn.out_proj.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn_layer_norm.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.self_attn_layer_norm.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.fc1.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.fc1.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.fc2.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.fc2.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.final_layer_norm.weight": "pytorch_model-29-of-32.bin", "model.decoder.layers.28.final_layer_norm.bias": "pytorch_model-29-of-32.bin", "model.decoder.layers.29.self_attn.q_proj.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.q_proj.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.k_proj.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.k_proj.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.v_proj.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.v_proj.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.out_proj.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn.out_proj.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn_layer_norm.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.self_attn_layer_norm.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.fc1.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.fc1.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.fc2.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.fc2.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.final_layer_norm.weight": "pytorch_model-30-of-32.bin", "model.decoder.layers.29.final_layer_norm.bias": "pytorch_model-30-of-32.bin", "model.decoder.layers.30.self_attn.q_proj.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.q_proj.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.k_proj.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.k_proj.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.v_proj.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.v_proj.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.out_proj.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn.out_proj.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn_layer_norm.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.self_attn_layer_norm.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.fc1.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.fc1.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.fc2.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.fc2.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.final_layer_norm.weight": "pytorch_model-31-of-32.bin", "model.decoder.layers.30.final_layer_norm.bias": "pytorch_model-31-of-32.bin", "model.decoder.layers.31.self_attn.q_proj.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.q_proj.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.k_proj.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.k_proj.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.v_proj.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.v_proj.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.out_proj.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn.out_proj.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn_layer_norm.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.self_attn_layer_norm.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.fc1.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.fc1.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.fc2.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.fc2.bias": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.final_layer_norm.weight": "pytorch_model-32-of-32.bin", "model.decoder.layers.31.final_layer_norm.bias": "pytorch_model-32-of-32.bin", "lm_head.weight": "pytorch_model-32-of-32.bin"}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"model_max_length": 512,
|
4 |
+
"padding_side": "right",
|
5 |
+
"tokenizer_class": "PreTrainedTokenizerFast"
|
6 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.999934373154245,
|
5 |
+
"global_step": 8571,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.18,
|
12 |
+
"learning_rate": 1.9420185252014917e-05,
|
13 |
+
"loss": 0.5431,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.35,
|
18 |
+
"learning_rate": 1.8217250090220138e-05,
|
19 |
+
"loss": 0.3568,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.53,
|
24 |
+
"learning_rate": 1.701431492842536e-05,
|
25 |
+
"loss": 0.2985,
|
26 |
+
"step": 1500
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 0.7,
|
30 |
+
"learning_rate": 1.581137976663058e-05,
|
31 |
+
"loss": 0.2649,
|
32 |
+
"step": 2000
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.88,
|
36 |
+
"learning_rate": 1.4608444604835801e-05,
|
37 |
+
"loss": 0.241,
|
38 |
+
"step": 2500
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 1.05,
|
42 |
+
"learning_rate": 1.340550944304102e-05,
|
43 |
+
"loss": 0.209,
|
44 |
+
"step": 3000
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 1.23,
|
48 |
+
"learning_rate": 1.2202574281246243e-05,
|
49 |
+
"loss": 0.1683,
|
50 |
+
"step": 3500
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 1.4,
|
54 |
+
"learning_rate": 1.0999639119451462e-05,
|
55 |
+
"loss": 0.1611,
|
56 |
+
"step": 4000
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 1.58,
|
60 |
+
"learning_rate": 9.796703957656684e-06,
|
61 |
+
"loss": 0.1536,
|
62 |
+
"step": 4500
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 1.75,
|
66 |
+
"learning_rate": 8.593768795861904e-06,
|
67 |
+
"loss": 0.1457,
|
68 |
+
"step": 5000
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 1.93,
|
72 |
+
"learning_rate": 7.390833634067124e-06,
|
73 |
+
"loss": 0.141,
|
74 |
+
"step": 5500
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 2.1,
|
78 |
+
"learning_rate": 6.187898472272345e-06,
|
79 |
+
"loss": 0.1142,
|
80 |
+
"step": 6000
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 2.28,
|
84 |
+
"learning_rate": 4.9849633104775655e-06,
|
85 |
+
"loss": 0.0962,
|
86 |
+
"step": 6500
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 2.45,
|
90 |
+
"learning_rate": 3.7820281486827865e-06,
|
91 |
+
"loss": 0.0946,
|
92 |
+
"step": 7000
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 2.63,
|
96 |
+
"learning_rate": 2.579092986888007e-06,
|
97 |
+
"loss": 0.091,
|
98 |
+
"step": 7500
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 2.8,
|
102 |
+
"learning_rate": 1.3761578250932275e-06,
|
103 |
+
"loss": 0.0891,
|
104 |
+
"step": 8000
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 2.98,
|
108 |
+
"learning_rate": 1.7322266329844823e-07,
|
109 |
+
"loss": 0.0877,
|
110 |
+
"step": 8500
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 3.0,
|
114 |
+
"step": 8571,
|
115 |
+
"total_flos": 1.8800594957369344e+16,
|
116 |
+
"train_loss": 0.19064752925667625,
|
117 |
+
"train_runtime": 224598.7373,
|
118 |
+
"train_samples_per_second": 19.539,
|
119 |
+
"train_steps_per_second": 0.038
|
120 |
+
}
|
121 |
+
],
|
122 |
+
"max_steps": 8571,
|
123 |
+
"num_train_epochs": 3,
|
124 |
+
"total_flos": 1.8800594957369344e+16,
|
125 |
+
"trial_name": null,
|
126 |
+
"trial_params": null
|
127 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bdd57b3fdca966c3798b3c5d2a963dd81d66fd9d263ec872b80bf98f4056cf0
|
3 |
+
size 5039
|