nadejdatarabukina commited on
Commit
460c2b7
·
verified ·
1 Parent(s): f6449fe

Training in progress, step 10, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c156a3b2974a55d71fcba9fd9590638881a5cc17c5595428dea09a7d7bfc41ce
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f88324378ffd906b01cfaa7bddb64e64d3ef621da69a0c155027f39f38f9a00
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00a1dd1bec282e0f5518af74b2bafe24126d8a3d5a4a96cc714bf3b1c17d4d38
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:260e9c73a274d8e9f45b604e236ac826578cc357b0c954e506fa86dfca05bc3e
3
  size 335922386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a29876f943925ecf15cbb27013d1531d954db6e6acea114dbaca15ed1a74bf3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba60c260384233e5cb0e7bbfabb0bd75f441fc8b78b5fed933e9ca48341a17ab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84b73b70b4d55707d0e07cefd5db81f7d92e9aa0ccc0fcacc15d9b1610bf62f6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c188a6a4749e6ca627bb6d536eb7443f499d5b1b88d98a78f9c713443e010d9c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.02336448598130841,
5
  "eval_steps": 8,
6
- "global_step": 5,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22,6 +22,28 @@
22
  "learning_rate": 0.00012,
23
  "loss": 2.9464,
24
  "step": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 3,
@@ -41,7 +63,7 @@
41
  "attributes": {}
42
  }
43
  },
44
- "total_flos": 1854718208901120.0,
45
  "train_batch_size": 2,
46
  "trial_name": null,
47
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04672897196261682,
5
  "eval_steps": 8,
6
+ "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22
  "learning_rate": 0.00012,
23
  "loss": 2.9464,
24
  "step": 3
25
+ },
26
+ {
27
+ "epoch": 0.028037383177570093,
28
+ "grad_norm": 1.4899441003799438,
29
+ "learning_rate": 0.0001992114701314478,
30
+ "loss": 2.6909,
31
+ "step": 6
32
+ },
33
+ {
34
+ "epoch": 0.037383177570093455,
35
+ "eval_loss": 2.327054977416992,
36
+ "eval_runtime": 11.7416,
37
+ "eval_samples_per_second": 7.75,
38
+ "eval_steps_per_second": 3.918,
39
+ "step": 8
40
+ },
41
+ {
42
+ "epoch": 0.04205607476635514,
43
+ "grad_norm": 1.3411593437194824,
44
+ "learning_rate": 0.00018763066800438636,
45
+ "loss": 2.4553,
46
+ "step": 9
47
  }
48
  ],
49
  "logging_steps": 3,
 
63
  "attributes": {}
64
  }
65
  },
66
+ "total_flos": 3709436417802240.0,
67
  "train_batch_size": 2,
68
  "trial_name": null,
69
  "trial_params": null