Xtracta-Qiming commited on
Commit
12accda
·
verified ·
1 Parent(s): bb49295

Training in progress, step 10

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:273d7c37129876c98f71bd168ef60b1efe986b88b1c9a670861b78c19e5e57e0
3
  size 4372840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b2235d91cfd06eee468eb8d63dfafcd75cbb75852a42bd2e08b81809d1adaa
3
  size 4372840
all_results.json CHANGED
@@ -1,8 +1,16 @@
1
  {
2
- "eval_loss": 0.08018716424703598,
 
3
  "eval_model_preparation_time": 0.0126,
4
- "eval_runtime": 21.8993,
5
  "eval_samples": 7,
6
- "eval_samples_per_second": 0.32,
7
- "eval_steps_per_second": 0.32
 
 
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 2.8363636363636364,
3
+ "eval_loss": 0.02887091226875782,
4
  "eval_model_preparation_time": 0.0126,
5
+ "eval_runtime": 67.8206,
6
  "eval_samples": 7,
7
+ "eval_samples_per_second": 0.324,
8
+ "eval_steps_per_second": 0.324,
9
+ "test_samples": 22,
10
+ "total_flos": 3.417506073386496e+16,
11
+ "train_loss": 0.05125234333368448,
12
+ "train_runtime": 1358.5629,
13
+ "train_samples": 110,
14
+ "train_samples_per_second": 0.243,
15
+ "train_steps_per_second": 0.029
16
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "eval_loss": 0.08018716424703598,
3
- "eval_model_preparation_time": 0.0126,
4
- "eval_runtime": 21.8993,
5
  "eval_samples": 7,
6
- "eval_samples_per_second": 0.32,
7
- "eval_steps_per_second": 0.32
8
  }
 
1
  {
2
+ "epoch": 2.8363636363636364,
3
+ "eval_loss": 0.03865480050444603,
4
+ "eval_runtime": 21.1079,
5
  "eval_samples": 7,
6
+ "eval_samples_per_second": 0.332,
7
+ "eval_steps_per_second": 0.332
8
  }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.8363636363636364,
3
+ "eval_loss": 0.02887091226875782,
4
+ "eval_runtime": 67.8206,
5
+ "eval_samples_per_second": 0.324,
6
+ "eval_steps_per_second": 0.324,
7
+ "test_samples": 22
8
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.8363636363636364,
3
+ "total_flos": 3.417506073386496e+16,
4
+ "train_loss": 0.05125234333368448,
5
+ "train_runtime": 1358.5629,
6
+ "train_samples": 110,
7
+ "train_samples_per_second": 0.243,
8
+ "train_steps_per_second": 0.029
9
+ }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.12545762956142426,
3
- "best_model_checkpoint": "/home/paperspace/mmdoc/outputs/Damarcode_AB_v1_20241129_qwen_v2/checkpoint-30",
4
  "epoch": 2.8363636363636364,
5
  "eval_steps": 10,
6
  "global_step": 39,
@@ -10,48 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.7272727272727273,
13
- "grad_norm": 1.2174557447433472,
14
  "learning_rate": 0.0002,
15
- "loss": 0.4848,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.7272727272727273,
20
- "eval_loss": 0.377216100692749,
21
- "eval_runtime": 21.2505,
22
- "eval_samples_per_second": 0.329,
23
- "eval_steps_per_second": 0.329,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 1.4545454545454546,
28
- "grad_norm": 0.7148427963256836,
29
  "learning_rate": 0.0002,
30
- "loss": 0.2937,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 1.4545454545454546,
35
- "eval_loss": 0.22557333111763,
36
- "eval_runtime": 21.2706,
37
- "eval_samples_per_second": 0.329,
38
- "eval_steps_per_second": 0.329,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 2.1818181818181817,
43
- "grad_norm": 3.1815290451049805,
44
  "learning_rate": 0.0002,
45
- "loss": 0.1886,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 2.1818181818181817,
50
- "eval_loss": 0.12545762956142426,
51
- "eval_runtime": 21.2759,
52
  "eval_samples_per_second": 0.329,
53
  "eval_steps_per_second": 0.329,
54
  "step": 30
 
 
 
 
 
 
 
 
 
55
  }
56
  ],
57
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.03865480050444603,
3
+ "best_model_checkpoint": "/home/paperspace/mmdoc/outputs/Damarcode_AB_v1_20241129_qwen_v2/checkpoint-39/checkpoint-30",
4
  "epoch": 2.8363636363636364,
5
  "eval_steps": 10,
6
  "global_step": 39,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.7272727272727273,
13
+ "grad_norm": 0.19426245987415314,
14
  "learning_rate": 0.0002,
15
+ "loss": 0.0774,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.7272727272727273,
20
+ "eval_loss": 0.060963425785303116,
21
+ "eval_runtime": 21.0525,
22
+ "eval_samples_per_second": 0.333,
23
+ "eval_steps_per_second": 0.333,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 1.4545454545454546,
28
+ "grad_norm": 0.21388182044029236,
29
  "learning_rate": 0.0002,
30
+ "loss": 0.04,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 1.4545454545454546,
35
+ "eval_loss": 0.0446288101375103,
36
+ "eval_runtime": 21.2102,
37
+ "eval_samples_per_second": 0.33,
38
+ "eval_steps_per_second": 0.33,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 2.1818181818181817,
43
+ "grad_norm": 2.1915783882141113,
44
  "learning_rate": 0.0002,
45
+ "loss": 0.0591,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 2.1818181818181817,
50
+ "eval_loss": 0.03865480050444603,
51
+ "eval_runtime": 21.2826,
52
  "eval_samples_per_second": 0.329,
53
  "eval_steps_per_second": 0.329,
54
  "step": 30
55
+ },
56
+ {
57
+ "epoch": 2.8363636363636364,
58
+ "step": 39,
59
+ "total_flos": 3.417506073386496e+16,
60
+ "train_loss": 0.05125234333368448,
61
+ "train_runtime": 1358.5629,
62
+ "train_samples_per_second": 0.243,
63
+ "train_steps_per_second": 0.029
64
  }
65
  ],
66
  "logging_steps": 10,