Training in progress, epoch 0
Browse files- config.json.sagemaker-uploaded +0 -0
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- model.safetensors.index.json.sagemaker-uploaded +0 -0
- trainer_log.jsonl +42 -41
- trainer_log.jsonl.sagemaker-uploaded +0 -0
- training_args.bin +2 -2
config.json.sagemaker-uploaded
ADDED
File without changes
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:219b3f0f6c9e190d4b29ea23845ae4d1c8d601d70f96a5daa4033ab8ca6c307b
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd9b67ddcbddf961c02bdb634e5ecf053e23d1cd9a3c623415c8ff0746a72135
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b300355db2e4e68e04cef811fd0464f1c6fbfcba40ba8415be90018d4d2cf5bf
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fb5322467931410af5080e68f16307ff4a5168fa8f7eac8376a7df1a90cae75
|
3 |
size 1168138808
|
model.safetensors.index.json.sagemaker-uploaded
ADDED
File without changes
|
trainer_log.jsonl
CHANGED
@@ -1,41 +1,42 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 1212, "loss": 0.
|
2 |
-
{"current_steps": 20, "total_steps": 1212, "loss": 0.
|
3 |
-
{"current_steps": 30, "total_steps": 1212, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 1212, "loss": 0.
|
5 |
-
{"current_steps": 50, "total_steps": 1212, "loss": 0.
|
6 |
-
{"current_steps": 60, "total_steps": 1212, "loss": 0.
|
7 |
-
{"current_steps": 70, "total_steps": 1212, "loss": 0.
|
8 |
-
{"current_steps": 80, "total_steps": 1212, "loss": 0.
|
9 |
-
{"current_steps": 90, "total_steps": 1212, "loss": 0.
|
10 |
-
{"current_steps": 100, "total_steps": 1212, "loss": 0.6307, "learning_rate": 5e-06, "epoch": 0.24737167594310452, "percentage": 8.25, "elapsed_time": "0:
|
11 |
-
{"current_steps": 110, "total_steps": 1212, "loss": 0.
|
12 |
-
{"current_steps": 120, "total_steps": 1212, "loss": 0.
|
13 |
-
{"current_steps": 130, "total_steps": 1212, "loss": 0.
|
14 |
-
{"current_steps": 140, "total_steps": 1212, "loss": 0.
|
15 |
-
{"current_steps": 150, "total_steps": 1212, "loss": 0.6046, "learning_rate": 5e-06, "epoch": 0.37105751391465674, "percentage": 12.38, "elapsed_time": "1:
|
16 |
-
{"current_steps": 160, "total_steps": 1212, "loss": 0.
|
17 |
-
{"current_steps": 170, "total_steps": 1212, "loss": 0.6082, "learning_rate": 5e-06, "epoch": 0.4205318491032777, "percentage": 14.03, "elapsed_time": "1:
|
18 |
-
{"current_steps": 180, "total_steps": 1212, "loss": 0.
|
19 |
-
{"current_steps": 190, "total_steps": 1212, "loss": 0.5994, "learning_rate": 5e-06, "epoch": 0.47000618429189855, "percentage": 15.68, "elapsed_time": "1:
|
20 |
-
{"current_steps": 200, "total_steps": 1212, "loss": 0.
|
21 |
-
{"current_steps": 210, "total_steps": 1212, "loss": 0.
|
22 |
-
{"current_steps": 220, "total_steps": 1212, "loss": 0.5927, "learning_rate": 5e-06, "epoch": 0.54421768707483, "percentage": 18.15, "elapsed_time": "1:
|
23 |
-
{"current_steps": 230, "total_steps": 1212, "loss": 0.5925, "learning_rate": 5e-06, "epoch": 0.5689548546691404, "percentage": 18.98, "elapsed_time": "
|
24 |
-
{"current_steps": 240, "total_steps": 1212, "loss": 0.5928, "learning_rate": 5e-06, "epoch": 0.5936920222634509, "percentage": 19.8, "elapsed_time": "
|
25 |
-
{"current_steps": 250, "total_steps": 1212, "loss": 0.5858, "learning_rate": 5e-06, "epoch": 0.6184291898577613, "percentage": 20.63, "elapsed_time": "2:
|
26 |
-
{"current_steps": 260, "total_steps": 1212, "loss": 0.5879, "learning_rate": 5e-06, "epoch": 0.6431663574520717, "percentage": 21.45, "elapsed_time": "2:
|
27 |
-
{"current_steps": 270, "total_steps": 1212, "loss": 0.5844, "learning_rate": 5e-06, "epoch": 0.6679035250463822, "percentage": 22.28, "elapsed_time": "2:
|
28 |
-
{"current_steps": 280, "total_steps": 1212, "loss": 0.5755, "learning_rate": 5e-06, "epoch": 0.6926406926406926, "percentage": 23.1, "elapsed_time": "2:
|
29 |
-
{"current_steps": 290, "total_steps": 1212, "loss": 0.5843, "learning_rate": 5e-06, "epoch": 0.717377860235003, "percentage": 23.93, "elapsed_time": "2:
|
30 |
-
{"current_steps": 300, "total_steps": 1212, "loss": 0.5864, "learning_rate": 5e-06, "epoch": 0.7421150278293135, "percentage": 24.75, "elapsed_time": "2:
|
31 |
-
{"current_steps": 310, "total_steps": 1212, "loss": 0.5822, "learning_rate": 5e-06, "epoch": 0.766852195423624, "percentage": 25.58, "elapsed_time": "2:
|
32 |
-
{"current_steps": 320, "total_steps": 1212, "loss": 0.5727, "learning_rate": 5e-06, "epoch": 0.7915893630179345, "percentage": 26.4, "elapsed_time": "2:
|
33 |
-
{"current_steps": 330, "total_steps": 1212, "loss": 0.5794, "learning_rate": 5e-06, "epoch": 0.8163265306122449, "percentage": 27.23, "elapsed_time": "2:
|
34 |
-
{"current_steps": 340, "total_steps": 1212, "loss": 0.5793, "learning_rate": 5e-06, "epoch": 0.8410636982065554, "percentage": 28.05, "elapsed_time": "
|
35 |
-
{"current_steps": 350, "total_steps": 1212, "loss": 0.5744, "learning_rate": 5e-06, "epoch": 0.8658008658008658, "percentage": 28.88, "elapsed_time": "
|
36 |
-
{"current_steps": 360, "total_steps": 1212, "loss": 0.5707, "learning_rate": 5e-06, "epoch": 0.8905380333951762, "percentage": 29.7, "elapsed_time": "
|
37 |
-
{"current_steps": 370, "total_steps": 1212, "loss": 0.5765, "learning_rate": 5e-06, "epoch": 0.9152752009894867, "percentage": 30.53, "elapsed_time": "3:
|
38 |
-
{"current_steps": 380, "total_steps": 1212, "loss": 0.5714, "learning_rate": 5e-06, "epoch": 0.9400123685837971, "percentage": 31.35, "elapsed_time": "3:
|
39 |
-
{"current_steps": 390, "total_steps": 1212, "loss": 0.5682, "learning_rate": 5e-06, "epoch": 0.9647495361781077, "percentage": 32.18, "elapsed_time": "3:
|
40 |
-
{"current_steps": 400, "total_steps": 1212, "loss": 0.5719, "learning_rate": 5e-06, "epoch": 0.9894867037724181, "percentage": 33.0, "elapsed_time": "3:
|
41 |
-
{"current_steps": 404, "total_steps": 1212, "eval_loss": 0.
|
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 1212, "loss": 0.8916, "learning_rate": 5e-06, "epoch": 0.024737167594310452, "percentage": 0.83, "elapsed_time": "0:04:56", "remaining_time": "9:53:37"}
|
2 |
+
{"current_steps": 20, "total_steps": 1212, "loss": 0.7677, "learning_rate": 5e-06, "epoch": 0.049474335188620905, "percentage": 1.65, "elapsed_time": "0:09:49", "remaining_time": "9:45:43"}
|
3 |
+
{"current_steps": 30, "total_steps": 1212, "loss": 0.7288, "learning_rate": 5e-06, "epoch": 0.07421150278293136, "percentage": 2.48, "elapsed_time": "0:14:43", "remaining_time": "9:39:51"}
|
4 |
+
{"current_steps": 40, "total_steps": 1212, "loss": 0.7031, "learning_rate": 5e-06, "epoch": 0.09894867037724181, "percentage": 3.3, "elapsed_time": "0:19:36", "remaining_time": "9:34:26"}
|
5 |
+
{"current_steps": 50, "total_steps": 1212, "loss": 0.6844, "learning_rate": 5e-06, "epoch": 0.12368583797155226, "percentage": 4.13, "elapsed_time": "0:24:28", "remaining_time": "9:28:59"}
|
6 |
+
{"current_steps": 60, "total_steps": 1212, "loss": 0.6598, "learning_rate": 5e-06, "epoch": 0.14842300556586271, "percentage": 4.95, "elapsed_time": "0:29:22", "remaining_time": "9:24:01"}
|
7 |
+
{"current_steps": 70, "total_steps": 1212, "loss": 0.6485, "learning_rate": 5e-06, "epoch": 0.17316017316017315, "percentage": 5.78, "elapsed_time": "0:34:16", "remaining_time": "9:19:06"}
|
8 |
+
{"current_steps": 80, "total_steps": 1212, "loss": 0.6398, "learning_rate": 5e-06, "epoch": 0.19789734075448362, "percentage": 6.6, "elapsed_time": "0:39:09", "remaining_time": "9:14:10"}
|
9 |
+
{"current_steps": 90, "total_steps": 1212, "loss": 0.6378, "learning_rate": 5e-06, "epoch": 0.22263450834879406, "percentage": 7.43, "elapsed_time": "0:44:03", "remaining_time": "9:09:17"}
|
10 |
+
{"current_steps": 100, "total_steps": 1212, "loss": 0.6307, "learning_rate": 5e-06, "epoch": 0.24737167594310452, "percentage": 8.25, "elapsed_time": "0:48:57", "remaining_time": "9:04:25"}
|
11 |
+
{"current_steps": 110, "total_steps": 1212, "loss": 0.6176, "learning_rate": 5e-06, "epoch": 0.272108843537415, "percentage": 9.08, "elapsed_time": "0:53:50", "remaining_time": "8:59:27"}
|
12 |
+
{"current_steps": 120, "total_steps": 1212, "loss": 0.6178, "learning_rate": 5e-06, "epoch": 0.29684601113172543, "percentage": 9.9, "elapsed_time": "0:58:43", "remaining_time": "8:54:26"}
|
13 |
+
{"current_steps": 130, "total_steps": 1212, "loss": 0.6133, "learning_rate": 5e-06, "epoch": 0.32158317872603587, "percentage": 10.73, "elapsed_time": "1:03:37", "remaining_time": "8:49:33"}
|
14 |
+
{"current_steps": 140, "total_steps": 1212, "loss": 0.6092, "learning_rate": 5e-06, "epoch": 0.3463203463203463, "percentage": 11.55, "elapsed_time": "1:08:31", "remaining_time": "8:44:40"}
|
15 |
+
{"current_steps": 150, "total_steps": 1212, "loss": 0.6046, "learning_rate": 5e-06, "epoch": 0.37105751391465674, "percentage": 12.38, "elapsed_time": "1:13:24", "remaining_time": "8:39:46"}
|
16 |
+
{"current_steps": 160, "total_steps": 1212, "loss": 0.6009, "learning_rate": 5e-06, "epoch": 0.39579468150896724, "percentage": 13.2, "elapsed_time": "1:18:18", "remaining_time": "8:34:54"}
|
17 |
+
{"current_steps": 170, "total_steps": 1212, "loss": 0.6082, "learning_rate": 5e-06, "epoch": 0.4205318491032777, "percentage": 14.03, "elapsed_time": "1:23:12", "remaining_time": "8:30:00"}
|
18 |
+
{"current_steps": 180, "total_steps": 1212, "loss": 0.6004, "learning_rate": 5e-06, "epoch": 0.4452690166975881, "percentage": 14.85, "elapsed_time": "1:28:05", "remaining_time": "8:25:02"}
|
19 |
+
{"current_steps": 190, "total_steps": 1212, "loss": 0.5994, "learning_rate": 5e-06, "epoch": 0.47000618429189855, "percentage": 15.68, "elapsed_time": "1:32:59", "remaining_time": "8:20:09"}
|
20 |
+
{"current_steps": 200, "total_steps": 1212, "loss": 0.5985, "learning_rate": 5e-06, "epoch": 0.49474335188620905, "percentage": 16.5, "elapsed_time": "1:37:52", "remaining_time": "8:15:16"}
|
21 |
+
{"current_steps": 210, "total_steps": 1212, "loss": 0.6022, "learning_rate": 5e-06, "epoch": 0.5194805194805194, "percentage": 17.33, "elapsed_time": "1:42:46", "remaining_time": "8:10:22"}
|
22 |
+
{"current_steps": 220, "total_steps": 1212, "loss": 0.5927, "learning_rate": 5e-06, "epoch": 0.54421768707483, "percentage": 18.15, "elapsed_time": "1:47:40", "remaining_time": "8:05:29"}
|
23 |
+
{"current_steps": 230, "total_steps": 1212, "loss": 0.5925, "learning_rate": 5e-06, "epoch": 0.5689548546691404, "percentage": 18.98, "elapsed_time": "1:52:33", "remaining_time": "8:00:36"}
|
24 |
+
{"current_steps": 240, "total_steps": 1212, "loss": 0.5928, "learning_rate": 5e-06, "epoch": 0.5936920222634509, "percentage": 19.8, "elapsed_time": "1:57:27", "remaining_time": "7:55:42"}
|
25 |
+
{"current_steps": 250, "total_steps": 1212, "loss": 0.5858, "learning_rate": 5e-06, "epoch": 0.6184291898577613, "percentage": 20.63, "elapsed_time": "2:02:20", "remaining_time": "7:50:45"}
|
26 |
+
{"current_steps": 260, "total_steps": 1212, "loss": 0.5879, "learning_rate": 5e-06, "epoch": 0.6431663574520717, "percentage": 21.45, "elapsed_time": "2:07:14", "remaining_time": "7:45:52"}
|
27 |
+
{"current_steps": 270, "total_steps": 1212, "loss": 0.5844, "learning_rate": 5e-06, "epoch": 0.6679035250463822, "percentage": 22.28, "elapsed_time": "2:12:06", "remaining_time": "7:40:54"}
|
28 |
+
{"current_steps": 280, "total_steps": 1212, "loss": 0.5755, "learning_rate": 5e-06, "epoch": 0.6926406926406926, "percentage": 23.1, "elapsed_time": "2:16:59", "remaining_time": "7:35:59"}
|
29 |
+
{"current_steps": 290, "total_steps": 1212, "loss": 0.5843, "learning_rate": 5e-06, "epoch": 0.717377860235003, "percentage": 23.93, "elapsed_time": "2:21:52", "remaining_time": "7:31:05"}
|
30 |
+
{"current_steps": 300, "total_steps": 1212, "loss": 0.5864, "learning_rate": 5e-06, "epoch": 0.7421150278293135, "percentage": 24.75, "elapsed_time": "2:26:46", "remaining_time": "7:26:11"}
|
31 |
+
{"current_steps": 310, "total_steps": 1212, "loss": 0.5822, "learning_rate": 5e-06, "epoch": 0.766852195423624, "percentage": 25.58, "elapsed_time": "2:31:40", "remaining_time": "7:21:19"}
|
32 |
+
{"current_steps": 320, "total_steps": 1212, "loss": 0.5727, "learning_rate": 5e-06, "epoch": 0.7915893630179345, "percentage": 26.4, "elapsed_time": "2:36:34", "remaining_time": "7:16:27"}
|
33 |
+
{"current_steps": 330, "total_steps": 1212, "loss": 0.5794, "learning_rate": 5e-06, "epoch": 0.8163265306122449, "percentage": 27.23, "elapsed_time": "2:41:28", "remaining_time": "7:11:34"}
|
34 |
+
{"current_steps": 340, "total_steps": 1212, "loss": 0.5793, "learning_rate": 5e-06, "epoch": 0.8410636982065554, "percentage": 28.05, "elapsed_time": "2:46:21", "remaining_time": "7:06:40"}
|
35 |
+
{"current_steps": 350, "total_steps": 1212, "loss": 0.5744, "learning_rate": 5e-06, "epoch": 0.8658008658008658, "percentage": 28.88, "elapsed_time": "2:51:14", "remaining_time": "7:01:45"}
|
36 |
+
{"current_steps": 360, "total_steps": 1212, "loss": 0.5707, "learning_rate": 5e-06, "epoch": 0.8905380333951762, "percentage": 29.7, "elapsed_time": "2:56:08", "remaining_time": "6:56:52"}
|
37 |
+
{"current_steps": 370, "total_steps": 1212, "loss": 0.5765, "learning_rate": 5e-06, "epoch": 0.9152752009894867, "percentage": 30.53, "elapsed_time": "3:01:02", "remaining_time": "6:51:59"}
|
38 |
+
{"current_steps": 380, "total_steps": 1212, "loss": 0.5714, "learning_rate": 5e-06, "epoch": 0.9400123685837971, "percentage": 31.35, "elapsed_time": "3:05:55", "remaining_time": "6:47:05"}
|
39 |
+
{"current_steps": 390, "total_steps": 1212, "loss": 0.5682, "learning_rate": 5e-06, "epoch": 0.9647495361781077, "percentage": 32.18, "elapsed_time": "3:10:48", "remaining_time": "6:42:10"}
|
40 |
+
{"current_steps": 400, "total_steps": 1212, "loss": 0.5719, "learning_rate": 5e-06, "epoch": 0.9894867037724181, "percentage": 33.0, "elapsed_time": "3:15:42", "remaining_time": "6:37:17"}
|
41 |
+
{"current_steps": 404, "total_steps": 1212, "eval_loss": 0.5683358907699585, "epoch": 0.9993815708101422, "percentage": 33.33, "elapsed_time": "3:21:25", "remaining_time": "6:42:50"}
|
42 |
+
{"current_steps": 410, "total_steps": 1212, "loss": 0.547, "learning_rate": 5e-06, "epoch": 1.0142238713667284, "percentage": 33.83, "elapsed_time": "3:25:11", "remaining_time": "6:41:22"}
|
trainer_log.jsonl.sagemaker-uploaded
ADDED
File without changes
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70387208c0b4e93addc06d25974d7a6a16aa3adb7cdc4104a2a4e9e559336783
|
3 |
+
size 7160
|