sedrickkeh commited on
Commit
e6811d5
·
verified ·
1 Parent(s): f1f2176

Training in progress, epoch 0

Browse files
config.json.sagemaker-uploaded ADDED
File without changes
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2853247b16d30a1980db2233455967087b0027adc381f4e9a76ebbf0b035e8be
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219b3f0f6c9e190d4b29ea23845ae4d1c8d601d70f96a5daa4033ab8ca6c307b
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17f0755499acb0f123914a4950b2ba17f9fc7b3973bb7da3618674ce1e6aee11
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd9b67ddcbddf961c02bdb634e5ecf053e23d1cd9a3c623415c8ff0746a72135
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17eb9806d26a3b917ebf98c4e8e15e23b5de00745d2248fa85a17c0f3c1f4287
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b300355db2e4e68e04cef811fd0464f1c6fbfcba40ba8415be90018d4d2cf5bf
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e901c9e9c7293d8c5ce4b6d21e14b5a2e1b399f7ae055310f663e1e411069b6
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fb5322467931410af5080e68f16307ff4a5168fa8f7eac8376a7df1a90cae75
3
  size 1168138808
model.safetensors.index.json.sagemaker-uploaded ADDED
File without changes
trainer_log.jsonl CHANGED
@@ -1,41 +1,42 @@
1
- {"current_steps": 10, "total_steps": 1212, "loss": 0.8929, "learning_rate": 5e-06, "epoch": 0.024737167594310452, "percentage": 0.83, "elapsed_time": "0:05:28", "remaining_time": "10:58:21"}
2
- {"current_steps": 20, "total_steps": 1212, "loss": 0.7739, "learning_rate": 5e-06, "epoch": 0.049474335188620905, "percentage": 1.65, "elapsed_time": "0:10:48", "remaining_time": "10:44:06"}
3
- {"current_steps": 30, "total_steps": 1212, "loss": 0.7286, "learning_rate": 5e-06, "epoch": 0.07421150278293136, "percentage": 2.48, "elapsed_time": "0:16:08", "remaining_time": "10:36:17"}
4
- {"current_steps": 40, "total_steps": 1212, "loss": 0.6964, "learning_rate": 5e-06, "epoch": 0.09894867037724181, "percentage": 3.3, "elapsed_time": "0:21:40", "remaining_time": "10:34:59"}
5
- {"current_steps": 50, "total_steps": 1212, "loss": 0.6808, "learning_rate": 5e-06, "epoch": 0.12368583797155226, "percentage": 4.13, "elapsed_time": "0:27:04", "remaining_time": "10:29:09"}
6
- {"current_steps": 60, "total_steps": 1212, "loss": 0.6577, "learning_rate": 5e-06, "epoch": 0.14842300556586271, "percentage": 4.95, "elapsed_time": "0:32:29", "remaining_time": "10:23:44"}
7
- {"current_steps": 70, "total_steps": 1212, "loss": 0.6476, "learning_rate": 5e-06, "epoch": 0.17316017316017315, "percentage": 5.78, "elapsed_time": "0:37:48", "remaining_time": "10:16:42"}
8
- {"current_steps": 80, "total_steps": 1212, "loss": 0.6395, "learning_rate": 5e-06, "epoch": 0.19789734075448362, "percentage": 6.6, "elapsed_time": "0:43:09", "remaining_time": "10:10:45"}
9
- {"current_steps": 90, "total_steps": 1212, "loss": 0.6376, "learning_rate": 5e-06, "epoch": 0.22263450834879406, "percentage": 7.43, "elapsed_time": "0:48:36", "remaining_time": "10:05:59"}
10
- {"current_steps": 100, "total_steps": 1212, "loss": 0.6307, "learning_rate": 5e-06, "epoch": 0.24737167594310452, "percentage": 8.25, "elapsed_time": "0:53:55", "remaining_time": "9:59:37"}
11
- {"current_steps": 110, "total_steps": 1212, "loss": 0.6175, "learning_rate": 5e-06, "epoch": 0.272108843537415, "percentage": 9.08, "elapsed_time": "0:59:16", "remaining_time": "9:53:46"}
12
- {"current_steps": 120, "total_steps": 1212, "loss": 0.6177, "learning_rate": 5e-06, "epoch": 0.29684601113172543, "percentage": 9.9, "elapsed_time": "1:04:36", "remaining_time": "9:47:59"}
13
- {"current_steps": 130, "total_steps": 1212, "loss": 0.6132, "learning_rate": 5e-06, "epoch": 0.32158317872603587, "percentage": 10.73, "elapsed_time": "1:09:55", "remaining_time": "9:42:01"}
14
- {"current_steps": 140, "total_steps": 1212, "loss": 0.6091, "learning_rate": 5e-06, "epoch": 0.3463203463203463, "percentage": 11.55, "elapsed_time": "1:15:17", "remaining_time": "9:36:27"}
15
- {"current_steps": 150, "total_steps": 1212, "loss": 0.6046, "learning_rate": 5e-06, "epoch": 0.37105751391465674, "percentage": 12.38, "elapsed_time": "1:20:36", "remaining_time": "9:30:42"}
16
- {"current_steps": 160, "total_steps": 1212, "loss": 0.601, "learning_rate": 5e-06, "epoch": 0.39579468150896724, "percentage": 13.2, "elapsed_time": "1:25:59", "remaining_time": "9:25:24"}
17
- {"current_steps": 170, "total_steps": 1212, "loss": 0.6082, "learning_rate": 5e-06, "epoch": 0.4205318491032777, "percentage": 14.03, "elapsed_time": "1:31:20", "remaining_time": "9:19:51"}
18
- {"current_steps": 180, "total_steps": 1212, "loss": 0.6003, "learning_rate": 5e-06, "epoch": 0.4452690166975881, "percentage": 14.85, "elapsed_time": "1:36:39", "remaining_time": "9:14:08"}
19
- {"current_steps": 190, "total_steps": 1212, "loss": 0.5994, "learning_rate": 5e-06, "epoch": 0.47000618429189855, "percentage": 15.68, "elapsed_time": "1:42:02", "remaining_time": "9:08:50"}
20
- {"current_steps": 200, "total_steps": 1212, "loss": 0.5986, "learning_rate": 5e-06, "epoch": 0.49474335188620905, "percentage": 16.5, "elapsed_time": "1:47:28", "remaining_time": "9:03:48"}
21
- {"current_steps": 210, "total_steps": 1212, "loss": 0.6023, "learning_rate": 5e-06, "epoch": 0.5194805194805194, "percentage": 17.33, "elapsed_time": "1:52:54", "remaining_time": "8:58:44"}
22
- {"current_steps": 220, "total_steps": 1212, "loss": 0.5927, "learning_rate": 5e-06, "epoch": 0.54421768707483, "percentage": 18.15, "elapsed_time": "1:58:20", "remaining_time": "8:53:37"}
23
- {"current_steps": 230, "total_steps": 1212, "loss": 0.5925, "learning_rate": 5e-06, "epoch": 0.5689548546691404, "percentage": 18.98, "elapsed_time": "2:03:47", "remaining_time": "8:48:30"}
24
- {"current_steps": 240, "total_steps": 1212, "loss": 0.5928, "learning_rate": 5e-06, "epoch": 0.5936920222634509, "percentage": 19.8, "elapsed_time": "2:09:13", "remaining_time": "8:43:21"}
25
- {"current_steps": 250, "total_steps": 1212, "loss": 0.5858, "learning_rate": 5e-06, "epoch": 0.6184291898577613, "percentage": 20.63, "elapsed_time": "2:14:36", "remaining_time": "8:37:58"}
26
- {"current_steps": 260, "total_steps": 1212, "loss": 0.5879, "learning_rate": 5e-06, "epoch": 0.6431663574520717, "percentage": 21.45, "elapsed_time": "2:20:03", "remaining_time": "8:32:48"}
27
- {"current_steps": 270, "total_steps": 1212, "loss": 0.5844, "learning_rate": 5e-06, "epoch": 0.6679035250463822, "percentage": 22.28, "elapsed_time": "2:25:29", "remaining_time": "8:27:35"}
28
- {"current_steps": 280, "total_steps": 1212, "loss": 0.5755, "learning_rate": 5e-06, "epoch": 0.6926406926406926, "percentage": 23.1, "elapsed_time": "2:30:55", "remaining_time": "8:22:23"}
29
- {"current_steps": 290, "total_steps": 1212, "loss": 0.5843, "learning_rate": 5e-06, "epoch": 0.717377860235003, "percentage": 23.93, "elapsed_time": "2:36:22", "remaining_time": "8:17:09"}
30
- {"current_steps": 300, "total_steps": 1212, "loss": 0.5864, "learning_rate": 5e-06, "epoch": 0.7421150278293135, "percentage": 24.75, "elapsed_time": "2:41:49", "remaining_time": "8:11:56"}
31
- {"current_steps": 310, "total_steps": 1212, "loss": 0.5822, "learning_rate": 5e-06, "epoch": 0.766852195423624, "percentage": 25.58, "elapsed_time": "2:47:16", "remaining_time": "8:06:43"}
32
- {"current_steps": 320, "total_steps": 1212, "loss": 0.5727, "learning_rate": 5e-06, "epoch": 0.7915893630179345, "percentage": 26.4, "elapsed_time": "2:52:42", "remaining_time": "8:01:26"}
33
- {"current_steps": 330, "total_steps": 1212, "loss": 0.5794, "learning_rate": 5e-06, "epoch": 0.8163265306122449, "percentage": 27.23, "elapsed_time": "2:58:09", "remaining_time": "7:56:09"}
34
- {"current_steps": 340, "total_steps": 1212, "loss": 0.5793, "learning_rate": 5e-06, "epoch": 0.8410636982065554, "percentage": 28.05, "elapsed_time": "3:03:36", "remaining_time": "7:50:53"}
35
- {"current_steps": 350, "total_steps": 1212, "loss": 0.5744, "learning_rate": 5e-06, "epoch": 0.8658008658008658, "percentage": 28.88, "elapsed_time": "3:09:03", "remaining_time": "7:45:36"}
36
- {"current_steps": 360, "total_steps": 1212, "loss": 0.5707, "learning_rate": 5e-06, "epoch": 0.8905380333951762, "percentage": 29.7, "elapsed_time": "3:14:28", "remaining_time": "7:40:16"}
37
- {"current_steps": 370, "total_steps": 1212, "loss": 0.5765, "learning_rate": 5e-06, "epoch": 0.9152752009894867, "percentage": 30.53, "elapsed_time": "3:19:49", "remaining_time": "7:34:44"}
38
- {"current_steps": 380, "total_steps": 1212, "loss": 0.5714, "learning_rate": 5e-06, "epoch": 0.9400123685837971, "percentage": 31.35, "elapsed_time": "3:25:09", "remaining_time": "7:29:10"}
39
- {"current_steps": 390, "total_steps": 1212, "loss": 0.5682, "learning_rate": 5e-06, "epoch": 0.9647495361781077, "percentage": 32.18, "elapsed_time": "3:30:32", "remaining_time": "7:23:45"}
40
- {"current_steps": 400, "total_steps": 1212, "loss": 0.5719, "learning_rate": 5e-06, "epoch": 0.9894867037724181, "percentage": 33.0, "elapsed_time": "3:35:56", "remaining_time": "7:18:22"}
41
- {"current_steps": 404, "total_steps": 1212, "eval_loss": 0.5685587525367737, "epoch": 0.9993815708101422, "percentage": 33.33, "elapsed_time": "3:43:01", "remaining_time": "7:26:02"}
 
 
1
+ {"current_steps": 10, "total_steps": 1212, "loss": 0.8916, "learning_rate": 5e-06, "epoch": 0.024737167594310452, "percentage": 0.83, "elapsed_time": "0:04:56", "remaining_time": "9:53:37"}
2
+ {"current_steps": 20, "total_steps": 1212, "loss": 0.7677, "learning_rate": 5e-06, "epoch": 0.049474335188620905, "percentage": 1.65, "elapsed_time": "0:09:49", "remaining_time": "9:45:43"}
3
+ {"current_steps": 30, "total_steps": 1212, "loss": 0.7288, "learning_rate": 5e-06, "epoch": 0.07421150278293136, "percentage": 2.48, "elapsed_time": "0:14:43", "remaining_time": "9:39:51"}
4
+ {"current_steps": 40, "total_steps": 1212, "loss": 0.7031, "learning_rate": 5e-06, "epoch": 0.09894867037724181, "percentage": 3.3, "elapsed_time": "0:19:36", "remaining_time": "9:34:26"}
5
+ {"current_steps": 50, "total_steps": 1212, "loss": 0.6844, "learning_rate": 5e-06, "epoch": 0.12368583797155226, "percentage": 4.13, "elapsed_time": "0:24:28", "remaining_time": "9:28:59"}
6
+ {"current_steps": 60, "total_steps": 1212, "loss": 0.6598, "learning_rate": 5e-06, "epoch": 0.14842300556586271, "percentage": 4.95, "elapsed_time": "0:29:22", "remaining_time": "9:24:01"}
7
+ {"current_steps": 70, "total_steps": 1212, "loss": 0.6485, "learning_rate": 5e-06, "epoch": 0.17316017316017315, "percentage": 5.78, "elapsed_time": "0:34:16", "remaining_time": "9:19:06"}
8
+ {"current_steps": 80, "total_steps": 1212, "loss": 0.6398, "learning_rate": 5e-06, "epoch": 0.19789734075448362, "percentage": 6.6, "elapsed_time": "0:39:09", "remaining_time": "9:14:10"}
9
+ {"current_steps": 90, "total_steps": 1212, "loss": 0.6378, "learning_rate": 5e-06, "epoch": 0.22263450834879406, "percentage": 7.43, "elapsed_time": "0:44:03", "remaining_time": "9:09:17"}
10
+ {"current_steps": 100, "total_steps": 1212, "loss": 0.6307, "learning_rate": 5e-06, "epoch": 0.24737167594310452, "percentage": 8.25, "elapsed_time": "0:48:57", "remaining_time": "9:04:25"}
11
+ {"current_steps": 110, "total_steps": 1212, "loss": 0.6176, "learning_rate": 5e-06, "epoch": 0.272108843537415, "percentage": 9.08, "elapsed_time": "0:53:50", "remaining_time": "8:59:27"}
12
+ {"current_steps": 120, "total_steps": 1212, "loss": 0.6178, "learning_rate": 5e-06, "epoch": 0.29684601113172543, "percentage": 9.9, "elapsed_time": "0:58:43", "remaining_time": "8:54:26"}
13
+ {"current_steps": 130, "total_steps": 1212, "loss": 0.6133, "learning_rate": 5e-06, "epoch": 0.32158317872603587, "percentage": 10.73, "elapsed_time": "1:03:37", "remaining_time": "8:49:33"}
14
+ {"current_steps": 140, "total_steps": 1212, "loss": 0.6092, "learning_rate": 5e-06, "epoch": 0.3463203463203463, "percentage": 11.55, "elapsed_time": "1:08:31", "remaining_time": "8:44:40"}
15
+ {"current_steps": 150, "total_steps": 1212, "loss": 0.6046, "learning_rate": 5e-06, "epoch": 0.37105751391465674, "percentage": 12.38, "elapsed_time": "1:13:24", "remaining_time": "8:39:46"}
16
+ {"current_steps": 160, "total_steps": 1212, "loss": 0.6009, "learning_rate": 5e-06, "epoch": 0.39579468150896724, "percentage": 13.2, "elapsed_time": "1:18:18", "remaining_time": "8:34:54"}
17
+ {"current_steps": 170, "total_steps": 1212, "loss": 0.6082, "learning_rate": 5e-06, "epoch": 0.4205318491032777, "percentage": 14.03, "elapsed_time": "1:23:12", "remaining_time": "8:30:00"}
18
+ {"current_steps": 180, "total_steps": 1212, "loss": 0.6004, "learning_rate": 5e-06, "epoch": 0.4452690166975881, "percentage": 14.85, "elapsed_time": "1:28:05", "remaining_time": "8:25:02"}
19
+ {"current_steps": 190, "total_steps": 1212, "loss": 0.5994, "learning_rate": 5e-06, "epoch": 0.47000618429189855, "percentage": 15.68, "elapsed_time": "1:32:59", "remaining_time": "8:20:09"}
20
+ {"current_steps": 200, "total_steps": 1212, "loss": 0.5985, "learning_rate": 5e-06, "epoch": 0.49474335188620905, "percentage": 16.5, "elapsed_time": "1:37:52", "remaining_time": "8:15:16"}
21
+ {"current_steps": 210, "total_steps": 1212, "loss": 0.6022, "learning_rate": 5e-06, "epoch": 0.5194805194805194, "percentage": 17.33, "elapsed_time": "1:42:46", "remaining_time": "8:10:22"}
22
+ {"current_steps": 220, "total_steps": 1212, "loss": 0.5927, "learning_rate": 5e-06, "epoch": 0.54421768707483, "percentage": 18.15, "elapsed_time": "1:47:40", "remaining_time": "8:05:29"}
23
+ {"current_steps": 230, "total_steps": 1212, "loss": 0.5925, "learning_rate": 5e-06, "epoch": 0.5689548546691404, "percentage": 18.98, "elapsed_time": "1:52:33", "remaining_time": "8:00:36"}
24
+ {"current_steps": 240, "total_steps": 1212, "loss": 0.5928, "learning_rate": 5e-06, "epoch": 0.5936920222634509, "percentage": 19.8, "elapsed_time": "1:57:27", "remaining_time": "7:55:42"}
25
+ {"current_steps": 250, "total_steps": 1212, "loss": 0.5858, "learning_rate": 5e-06, "epoch": 0.6184291898577613, "percentage": 20.63, "elapsed_time": "2:02:20", "remaining_time": "7:50:45"}
26
+ {"current_steps": 260, "total_steps": 1212, "loss": 0.5879, "learning_rate": 5e-06, "epoch": 0.6431663574520717, "percentage": 21.45, "elapsed_time": "2:07:14", "remaining_time": "7:45:52"}
27
+ {"current_steps": 270, "total_steps": 1212, "loss": 0.5844, "learning_rate": 5e-06, "epoch": 0.6679035250463822, "percentage": 22.28, "elapsed_time": "2:12:06", "remaining_time": "7:40:54"}
28
+ {"current_steps": 280, "total_steps": 1212, "loss": 0.5755, "learning_rate": 5e-06, "epoch": 0.6926406926406926, "percentage": 23.1, "elapsed_time": "2:16:59", "remaining_time": "7:35:59"}
29
+ {"current_steps": 290, "total_steps": 1212, "loss": 0.5843, "learning_rate": 5e-06, "epoch": 0.717377860235003, "percentage": 23.93, "elapsed_time": "2:21:52", "remaining_time": "7:31:05"}
30
+ {"current_steps": 300, "total_steps": 1212, "loss": 0.5864, "learning_rate": 5e-06, "epoch": 0.7421150278293135, "percentage": 24.75, "elapsed_time": "2:26:46", "remaining_time": "7:26:11"}
31
+ {"current_steps": 310, "total_steps": 1212, "loss": 0.5822, "learning_rate": 5e-06, "epoch": 0.766852195423624, "percentage": 25.58, "elapsed_time": "2:31:40", "remaining_time": "7:21:19"}
32
+ {"current_steps": 320, "total_steps": 1212, "loss": 0.5727, "learning_rate": 5e-06, "epoch": 0.7915893630179345, "percentage": 26.4, "elapsed_time": "2:36:34", "remaining_time": "7:16:27"}
33
+ {"current_steps": 330, "total_steps": 1212, "loss": 0.5794, "learning_rate": 5e-06, "epoch": 0.8163265306122449, "percentage": 27.23, "elapsed_time": "2:41:28", "remaining_time": "7:11:34"}
34
+ {"current_steps": 340, "total_steps": 1212, "loss": 0.5793, "learning_rate": 5e-06, "epoch": 0.8410636982065554, "percentage": 28.05, "elapsed_time": "2:46:21", "remaining_time": "7:06:40"}
35
+ {"current_steps": 350, "total_steps": 1212, "loss": 0.5744, "learning_rate": 5e-06, "epoch": 0.8658008658008658, "percentage": 28.88, "elapsed_time": "2:51:14", "remaining_time": "7:01:45"}
36
+ {"current_steps": 360, "total_steps": 1212, "loss": 0.5707, "learning_rate": 5e-06, "epoch": 0.8905380333951762, "percentage": 29.7, "elapsed_time": "2:56:08", "remaining_time": "6:56:52"}
37
+ {"current_steps": 370, "total_steps": 1212, "loss": 0.5765, "learning_rate": 5e-06, "epoch": 0.9152752009894867, "percentage": 30.53, "elapsed_time": "3:01:02", "remaining_time": "6:51:59"}
38
+ {"current_steps": 380, "total_steps": 1212, "loss": 0.5714, "learning_rate": 5e-06, "epoch": 0.9400123685837971, "percentage": 31.35, "elapsed_time": "3:05:55", "remaining_time": "6:47:05"}
39
+ {"current_steps": 390, "total_steps": 1212, "loss": 0.5682, "learning_rate": 5e-06, "epoch": 0.9647495361781077, "percentage": 32.18, "elapsed_time": "3:10:48", "remaining_time": "6:42:10"}
40
+ {"current_steps": 400, "total_steps": 1212, "loss": 0.5719, "learning_rate": 5e-06, "epoch": 0.9894867037724181, "percentage": 33.0, "elapsed_time": "3:15:42", "remaining_time": "6:37:17"}
41
+ {"current_steps": 404, "total_steps": 1212, "eval_loss": 0.5683358907699585, "epoch": 0.9993815708101422, "percentage": 33.33, "elapsed_time": "3:21:25", "remaining_time": "6:42:50"}
42
+ {"current_steps": 410, "total_steps": 1212, "loss": 0.547, "learning_rate": 5e-06, "epoch": 1.0142238713667284, "percentage": 33.83, "elapsed_time": "3:25:11", "remaining_time": "6:41:22"}
trainer_log.jsonl.sagemaker-uploaded ADDED
File without changes
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8969b3293161c8ee2a1253f06434e669b79d8ed99971cfc66707afb628f44839
3
- size 7224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70387208c0b4e93addc06d25974d7a6a16aa3adb7cdc4104a2a4e9e559336783
3
+ size 7160