Bisher commited on
Commit
c45c7e3
·
verified ·
1 Parent(s): 4357cd1

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +179 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6899445915222168,
5
+ "train_runtime": 172.2338,
6
+ "train_samples_per_second": 0.581,
7
+ "train_steps_per_second": 0.145
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.6899445915222168,
5
+ "train_runtime": 172.2338,
6
+ "train_samples_per_second": 0.581,
7
+ "train_steps_per_second": 0.145
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.24,
5
+ "eval_steps": 1,
6
+ "global_step": 6,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "grad_norm": 2.9803988933563232,
14
+ "learning_rate": 6e-06,
15
+ "loss": 0.6695,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "eval_EER": 0.6910112359540398,
21
+ "eval_FN": 10,
22
+ "eval_FP": 3,
23
+ "eval_TN": 175,
24
+ "eval_TP": 12,
25
+ "eval_accuracy": 0.935,
26
+ "eval_auc_roc": 0.20658835546475995,
27
+ "eval_f1": 0.9294780731144366,
28
+ "eval_loss": 0.6780480146408081,
29
+ "eval_min_tDCF": 0.05,
30
+ "eval_precision": 0.9298918918918919,
31
+ "eval_recall": 0.935,
32
+ "eval_runtime": 66.8831,
33
+ "eval_samples_per_second": 2.99,
34
+ "eval_steps_per_second": 2.99,
35
+ "step": 1
36
+ },
37
+ {
38
+ "epoch": 0.08,
39
+ "grad_norm": 3.574291467666626,
40
+ "learning_rate": 1.2e-05,
41
+ "loss": 0.6494,
42
+ "step": 2
43
+ },
44
+ {
45
+ "epoch": 0.08,
46
+ "eval_EER": 0.5,
47
+ "eval_FN": 10,
48
+ "eval_FP": 5,
49
+ "eval_TN": 173,
50
+ "eval_TP": 12,
51
+ "eval_accuracy": 0.925,
52
+ "eval_auc_roc": 0.5334525025536261,
53
+ "eval_f1": 0.9207116982740251,
54
+ "eval_loss": 0.6533163189888,
55
+ "eval_min_tDCF": 0.03488253319713994,
56
+ "eval_precision": 0.9190131790421087,
57
+ "eval_recall": 0.925,
58
+ "eval_runtime": 66.2156,
59
+ "eval_samples_per_second": 3.02,
60
+ "eval_steps_per_second": 3.02,
61
+ "step": 2
62
+ },
63
+ {
64
+ "epoch": 0.12,
65
+ "grad_norm": 3.4800655841827393,
66
+ "learning_rate": 1.8e-05,
67
+ "loss": 0.6076,
68
+ "step": 3
69
+ },
70
+ {
71
+ "epoch": 0.12,
72
+ "eval_EER": 0.45454545454545453,
73
+ "eval_FN": 9,
74
+ "eval_FP": 5,
75
+ "eval_TN": 173,
76
+ "eval_TP": 13,
77
+ "eval_accuracy": 0.93,
78
+ "eval_auc_roc": 0.5888661899897855,
79
+ "eval_f1": 0.9268888888888889,
80
+ "eval_loss": 0.6053206920623779,
81
+ "eval_min_tDCF": 0.03873850868232891,
82
+ "eval_precision": 0.9254334554334553,
83
+ "eval_recall": 0.93,
84
+ "eval_runtime": 66.8065,
85
+ "eval_samples_per_second": 2.994,
86
+ "eval_steps_per_second": 2.994,
87
+ "step": 3
88
+ },
89
+ {
90
+ "epoch": 0.16,
91
+ "grad_norm": 3.8723998069763184,
92
+ "learning_rate": 2.4e-05,
93
+ "loss": 0.6136,
94
+ "step": 4
95
+ },
96
+ {
97
+ "epoch": 0.16,
98
+ "eval_EER": 0.2727272727273082,
99
+ "eval_FN": 6,
100
+ "eval_FP": 5,
101
+ "eval_TN": 173,
102
+ "eval_TP": 16,
103
+ "eval_accuracy": 0.945,
104
+ "eval_auc_roc": 0.7681307456588355,
105
+ "eval_f1": 0.9444374959286042,
106
+ "eval_loss": 0.5403749346733093,
107
+ "eval_min_tDCF": 0.03794688457609806,
108
+ "eval_precision": 0.943977121574887,
109
+ "eval_recall": 0.945,
110
+ "eval_runtime": 65.9313,
111
+ "eval_samples_per_second": 3.033,
112
+ "eval_steps_per_second": 3.033,
113
+ "step": 4
114
+ },
115
+ {
116
+ "epoch": 0.2,
117
+ "grad_norm": 3.4409637451171875,
118
+ "learning_rate": 3e-05,
119
+ "loss": 0.537,
120
+ "step": 5
121
+ },
122
+ {
123
+ "epoch": 0.2,
124
+ "eval_EER": 0.1573033707865425,
125
+ "eval_FN": 6,
126
+ "eval_FP": 5,
127
+ "eval_TN": 173,
128
+ "eval_TP": 16,
129
+ "eval_accuracy": 0.945,
130
+ "eval_auc_roc": 0.8600612870275791,
131
+ "eval_f1": 0.9444374959286042,
132
+ "eval_loss": 0.46759718656539917,
133
+ "eval_min_tDCF": 0.03636363636363637,
134
+ "eval_precision": 0.943977121574887,
135
+ "eval_recall": 0.945,
136
+ "eval_runtime": 66.2088,
137
+ "eval_samples_per_second": 3.021,
138
+ "eval_steps_per_second": 3.021,
139
+ "step": 5
140
+ },
141
+ {
142
+ "epoch": 0.24,
143
+ "grad_norm": 10.769447326660156,
144
+ "learning_rate": 2.9333333333333333e-05,
145
+ "loss": 0.4852,
146
+ "step": 6
147
+ }
148
+ ],
149
+ "logging_steps": 1,
150
+ "max_steps": 50,
151
+ "num_input_tokens_seen": 0,
152
+ "num_train_epochs": 2,
153
+ "save_steps": 50,
154
+ "stateful_callbacks": {
155
+ "EarlyStoppingCallback": {
156
+ "args": {
157
+ "early_stopping_patience": 2,
158
+ "early_stopping_threshold": 0.0
159
+ },
160
+ "attributes": {
161
+ "early_stopping_patience_counter": 0
162
+ }
163
+ },
164
+ "TrainerControl": {
165
+ "args": {
166
+ "should_epoch_stop": false,
167
+ "should_evaluate": false,
168
+ "should_log": false,
169
+ "should_save": false,
170
+ "should_training_stop": false
171
+ },
172
+ "attributes": {}
173
+ }
174
+ },
175
+ "total_flos": 0.0,
176
+ "train_batch_size": 1,
177
+ "trial_name": null,
178
+ "trial_params": null
179
+ }