agentlans commited on
Commit
c60f53f
·
verified ·
1 Parent(s): 2a25c01

Upload 13 files

Browse files
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_loss": 2.435030460357666,
4
- "eval_runtime": 2.5197,
5
- "eval_samples": 2500,
6
- "eval_samples_per_second": 992.173,
7
- "eval_steps_per_second": 124.22,
8
- "num_input_tokens_seen": 55653732,
9
- "total_flos": 1.4738832163602432e+16,
10
- "train_loss": 1.7073542784139526,
11
- "train_runtime": 2504.3889,
12
- "train_samples": 248732,
13
- "train_samples_per_second": 297.955,
14
- "train_steps_per_second": 37.245,
15
- "train_tokens_per_second": 22216.164
16
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.8425856828689575,
4
+ "eval_runtime": 78.2516,
5
+ "eval_samples": 83311,
6
+ "eval_samples_per_second": 1064.656,
7
+ "eval_steps_per_second": 133.084,
8
+ "num_input_tokens_seen": 224864981,
9
+ "total_flos": 5.955121238645146e+16,
10
+ "train_loss": 1.7257680629754402,
11
+ "train_runtime": 16392.488,
12
+ "train_samples": 333243,
13
+ "train_samples_per_second": 203.29,
14
+ "train_steps_per_second": 25.412,
15
+ "train_tokens_per_second": 13715.035
16
  }
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "Helsinki-NLP/opus-mt-zh-en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
@@ -50,7 +49,7 @@
50
  "share_encoder_decoder_embeddings": true,
51
  "static_position_embeddings": true,
52
  "torch_dtype": "float32",
53
- "transformers_version": "4.48.1",
54
  "use_cache": true,
55
  "vocab_size": 65001
56
  }
 
1
  {
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "swish",
4
  "add_bias_logits": false,
 
49
  "share_encoder_decoder_embeddings": true,
50
  "static_position_embeddings": true,
51
  "torch_dtype": "float32",
52
+ "transformers_version": "4.51.3",
53
  "use_cache": true,
54
  "vocab_size": 65001
55
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_loss": 2.435030460357666,
4
- "eval_runtime": 2.5197,
5
- "eval_samples": 2500,
6
- "eval_samples_per_second": 992.173,
7
- "eval_steps_per_second": 124.22,
8
- "num_input_tokens_seen": 55653732
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 1.8425856828689575,
4
+ "eval_runtime": 78.2516,
5
+ "eval_samples": 83311,
6
+ "eval_samples_per_second": 1064.656,
7
+ "eval_steps_per_second": 133.084,
8
+ "num_input_tokens_seen": 224864981
9
  }
generation_config.json CHANGED
@@ -12,5 +12,5 @@
12
  "num_beams": 6,
13
  "pad_token_id": 65000,
14
  "renormalize_logits": true,
15
- "transformers_version": "4.48.1"
16
  }
 
12
  "num_beams": 6,
13
  "pad_token_id": 65000,
14
  "renormalize_logits": true,
15
+ "transformers_version": "4.51.3"
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f894a6d080dfe10bed879a7505ab686823e3d5ba15eed0f44ea70a17462fe4a
3
  size 309965092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f64d3422f29320e25c3147ccad3fd8c2ff33f21c304ce3fef5ad0635cb65a1b
3
  size 309965092
train_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 3.0,
3
- "num_input_tokens_seen": 55653732,
4
- "total_flos": 1.4738832163602432e+16,
5
- "train_loss": 1.7073542784139526,
6
- "train_runtime": 2504.3889,
7
- "train_samples": 248732,
8
- "train_samples_per_second": 297.955,
9
- "train_steps_per_second": 37.245,
10
- "train_tokens_per_second": 22216.164
11
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "num_input_tokens_seen": 224864981,
4
+ "total_flos": 5.955121238645146e+16,
5
+ "train_loss": 1.7257680629754402,
6
+ "train_runtime": 16392.488,
7
+ "train_samples": 333243,
8
+ "train_samples_per_second": 203.29,
9
+ "train_steps_per_second": 25.412,
10
+ "train_tokens_per_second": 13715.035
11
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b6b0e3aff2a6ecffae867eef3409d7416d0e3a25aa9423ea38026d8d618f479
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e21d4704e53ce765ad8089f6ad1256599250b602615c481e5a598d83131099ba
3
+ size 5496