warmestman
commited on
Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4993448880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d7f288875c8726d4f74614c4b4d578bf9f6c37fad3979dd0d09151a806084ec
|
3 |
size 4993448880
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1180663192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52fbb2dcbc66d7c4ed8d2f213452b756beef85e83d9865c31ecac6cbf30b67a6
|
3 |
size 1180663192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3095446256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:931c79968a2b40db2a2f1274cfd99c95dae4c27c65afcce2ab924300046d3108
|
3 |
size 3095446256
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:233fa719614ba3255a065a1e0c9c0afee77a964a4cb4878352b10cf17f61bece
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11d4981b9ead74252fd412088033260fc7419a972daef228681b9d5d3c51ee44
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 41.91738628238271,
|
3 |
"best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2248,6 +2248,255 @@
|
|
2248 |
"eval_steps_per_second": 0.089,
|
2249 |
"eval_wer": 42.20889940047302,
|
2250 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2251 |
}
|
2252 |
],
|
2253 |
"logging_steps": 25,
|
@@ -2255,7 +2504,7 @@
|
|
2255 |
"num_input_tokens_seen": 0,
|
2256 |
"num_train_epochs": 120,
|
2257 |
"save_steps": 1000,
|
2258 |
-
"total_flos":
|
2259 |
"train_batch_size": 16,
|
2260 |
"trial_name": null,
|
2261 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 41.91738628238271,
|
3 |
"best_model_checkpoint": "warmestman/whisper-large-v3-mn-cv-fleurs/checkpoint-6000",
|
4 |
+
"epoch": 59.880239520958085,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 10000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2248 |
"eval_steps_per_second": 0.089,
|
2249 |
"eval_wer": 42.20889940047302,
|
2250 |
"step": 9000
|
2251 |
+
},
|
2252 |
+
{
|
2253 |
+
"epoch": 54.04,
|
2254 |
+
"learning_rate": 5.629230769230769e-07,
|
2255 |
+
"loss": 0.0003,
|
2256 |
+
"step": 9025
|
2257 |
+
},
|
2258 |
+
{
|
2259 |
+
"epoch": 54.19,
|
2260 |
+
"learning_rate": 5.616410256410256e-07,
|
2261 |
+
"loss": 0.0003,
|
2262 |
+
"step": 9050
|
2263 |
+
},
|
2264 |
+
{
|
2265 |
+
"epoch": 54.34,
|
2266 |
+
"learning_rate": 5.603589743589743e-07,
|
2267 |
+
"loss": 0.0003,
|
2268 |
+
"step": 9075
|
2269 |
+
},
|
2270 |
+
{
|
2271 |
+
"epoch": 54.49,
|
2272 |
+
"learning_rate": 5.59076923076923e-07,
|
2273 |
+
"loss": 0.0003,
|
2274 |
+
"step": 9100
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 54.64,
|
2278 |
+
"learning_rate": 5.577948717948717e-07,
|
2279 |
+
"loss": 0.0003,
|
2280 |
+
"step": 9125
|
2281 |
+
},
|
2282 |
+
{
|
2283 |
+
"epoch": 54.79,
|
2284 |
+
"learning_rate": 5.565128205128204e-07,
|
2285 |
+
"loss": 0.0003,
|
2286 |
+
"step": 9150
|
2287 |
+
},
|
2288 |
+
{
|
2289 |
+
"epoch": 54.94,
|
2290 |
+
"learning_rate": 5.552307692307692e-07,
|
2291 |
+
"loss": 0.0003,
|
2292 |
+
"step": 9175
|
2293 |
+
},
|
2294 |
+
{
|
2295 |
+
"epoch": 55.09,
|
2296 |
+
"learning_rate": 5.539487179487179e-07,
|
2297 |
+
"loss": 0.0002,
|
2298 |
+
"step": 9200
|
2299 |
+
},
|
2300 |
+
{
|
2301 |
+
"epoch": 55.24,
|
2302 |
+
"learning_rate": 5.526666666666666e-07,
|
2303 |
+
"loss": 0.0002,
|
2304 |
+
"step": 9225
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 55.39,
|
2308 |
+
"learning_rate": 5.513846153846153e-07,
|
2309 |
+
"loss": 0.0002,
|
2310 |
+
"step": 9250
|
2311 |
+
},
|
2312 |
+
{
|
2313 |
+
"epoch": 55.54,
|
2314 |
+
"learning_rate": 5.501025641025641e-07,
|
2315 |
+
"loss": 0.0002,
|
2316 |
+
"step": 9275
|
2317 |
+
},
|
2318 |
+
{
|
2319 |
+
"epoch": 55.69,
|
2320 |
+
"learning_rate": 5.488205128205128e-07,
|
2321 |
+
"loss": 0.0003,
|
2322 |
+
"step": 9300
|
2323 |
+
},
|
2324 |
+
{
|
2325 |
+
"epoch": 55.84,
|
2326 |
+
"learning_rate": 5.475384615384615e-07,
|
2327 |
+
"loss": 0.0002,
|
2328 |
+
"step": 9325
|
2329 |
+
},
|
2330 |
+
{
|
2331 |
+
"epoch": 55.99,
|
2332 |
+
"learning_rate": 5.462564102564102e-07,
|
2333 |
+
"loss": 0.0002,
|
2334 |
+
"step": 9350
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 56.14,
|
2338 |
+
"learning_rate": 5.44974358974359e-07,
|
2339 |
+
"loss": 0.0002,
|
2340 |
+
"step": 9375
|
2341 |
+
},
|
2342 |
+
{
|
2343 |
+
"epoch": 56.29,
|
2344 |
+
"learning_rate": 5.436923076923077e-07,
|
2345 |
+
"loss": 0.0002,
|
2346 |
+
"step": 9400
|
2347 |
+
},
|
2348 |
+
{
|
2349 |
+
"epoch": 56.44,
|
2350 |
+
"learning_rate": 5.424102564102564e-07,
|
2351 |
+
"loss": 0.0002,
|
2352 |
+
"step": 9425
|
2353 |
+
},
|
2354 |
+
{
|
2355 |
+
"epoch": 56.59,
|
2356 |
+
"learning_rate": 5.411282051282051e-07,
|
2357 |
+
"loss": 0.0002,
|
2358 |
+
"step": 9450
|
2359 |
+
},
|
2360 |
+
{
|
2361 |
+
"epoch": 56.74,
|
2362 |
+
"learning_rate": 5.398461538461539e-07,
|
2363 |
+
"loss": 0.0002,
|
2364 |
+
"step": 9475
|
2365 |
+
},
|
2366 |
+
{
|
2367 |
+
"epoch": 56.89,
|
2368 |
+
"learning_rate": 5.385641025641026e-07,
|
2369 |
+
"loss": 0.0002,
|
2370 |
+
"step": 9500
|
2371 |
+
},
|
2372 |
+
{
|
2373 |
+
"epoch": 57.04,
|
2374 |
+
"learning_rate": 5.372820512820513e-07,
|
2375 |
+
"loss": 0.0002,
|
2376 |
+
"step": 9525
|
2377 |
+
},
|
2378 |
+
{
|
2379 |
+
"epoch": 57.19,
|
2380 |
+
"learning_rate": 5.36e-07,
|
2381 |
+
"loss": 0.0002,
|
2382 |
+
"step": 9550
|
2383 |
+
},
|
2384 |
+
{
|
2385 |
+
"epoch": 57.34,
|
2386 |
+
"learning_rate": 5.347179487179488e-07,
|
2387 |
+
"loss": 0.0002,
|
2388 |
+
"step": 9575
|
2389 |
+
},
|
2390 |
+
{
|
2391 |
+
"epoch": 57.49,
|
2392 |
+
"learning_rate": 5.334358974358975e-07,
|
2393 |
+
"loss": 0.0002,
|
2394 |
+
"step": 9600
|
2395 |
+
},
|
2396 |
+
{
|
2397 |
+
"epoch": 57.63,
|
2398 |
+
"learning_rate": 5.321538461538462e-07,
|
2399 |
+
"loss": 0.0002,
|
2400 |
+
"step": 9625
|
2401 |
+
},
|
2402 |
+
{
|
2403 |
+
"epoch": 57.78,
|
2404 |
+
"learning_rate": 5.308717948717949e-07,
|
2405 |
+
"loss": 0.0002,
|
2406 |
+
"step": 9650
|
2407 |
+
},
|
2408 |
+
{
|
2409 |
+
"epoch": 57.93,
|
2410 |
+
"learning_rate": 5.295897435897437e-07,
|
2411 |
+
"loss": 0.0002,
|
2412 |
+
"step": 9675
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 58.08,
|
2416 |
+
"learning_rate": 5.283076923076923e-07,
|
2417 |
+
"loss": 0.0002,
|
2418 |
+
"step": 9700
|
2419 |
+
},
|
2420 |
+
{
|
2421 |
+
"epoch": 58.23,
|
2422 |
+
"learning_rate": 5.27025641025641e-07,
|
2423 |
+
"loss": 0.0002,
|
2424 |
+
"step": 9725
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 58.38,
|
2428 |
+
"learning_rate": 5.257435897435897e-07,
|
2429 |
+
"loss": 0.0002,
|
2430 |
+
"step": 9750
|
2431 |
+
},
|
2432 |
+
{
|
2433 |
+
"epoch": 58.53,
|
2434 |
+
"learning_rate": 5.244615384615385e-07,
|
2435 |
+
"loss": 0.0002,
|
2436 |
+
"step": 9775
|
2437 |
+
},
|
2438 |
+
{
|
2439 |
+
"epoch": 58.68,
|
2440 |
+
"learning_rate": 5.231794871794871e-07,
|
2441 |
+
"loss": 0.0002,
|
2442 |
+
"step": 9800
|
2443 |
+
},
|
2444 |
+
{
|
2445 |
+
"epoch": 58.83,
|
2446 |
+
"learning_rate": 5.218974358974358e-07,
|
2447 |
+
"loss": 0.0002,
|
2448 |
+
"step": 9825
|
2449 |
+
},
|
2450 |
+
{
|
2451 |
+
"epoch": 58.98,
|
2452 |
+
"learning_rate": 5.206153846153845e-07,
|
2453 |
+
"loss": 0.0002,
|
2454 |
+
"step": 9850
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 59.13,
|
2458 |
+
"learning_rate": 5.193333333333332e-07,
|
2459 |
+
"loss": 0.0002,
|
2460 |
+
"step": 9875
|
2461 |
+
},
|
2462 |
+
{
|
2463 |
+
"epoch": 59.28,
|
2464 |
+
"learning_rate": 5.18051282051282e-07,
|
2465 |
+
"loss": 0.0002,
|
2466 |
+
"step": 9900
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 59.43,
|
2470 |
+
"learning_rate": 5.167692307692307e-07,
|
2471 |
+
"loss": 0.0002,
|
2472 |
+
"step": 9925
|
2473 |
+
},
|
2474 |
+
{
|
2475 |
+
"epoch": 59.58,
|
2476 |
+
"learning_rate": 5.154871794871794e-07,
|
2477 |
+
"loss": 0.0002,
|
2478 |
+
"step": 9950
|
2479 |
+
},
|
2480 |
+
{
|
2481 |
+
"epoch": 59.73,
|
2482 |
+
"learning_rate": 5.142051282051281e-07,
|
2483 |
+
"loss": 0.0002,
|
2484 |
+
"step": 9975
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 59.88,
|
2488 |
+
"learning_rate": 5.129230769230769e-07,
|
2489 |
+
"loss": 0.0002,
|
2490 |
+
"step": 10000
|
2491 |
+
},
|
2492 |
+
{
|
2493 |
+
"epoch": 59.88,
|
2494 |
+
"eval_loss": 0.7664361596107483,
|
2495 |
+
"eval_runtime": 596.9914,
|
2496 |
+
"eval_samples_per_second": 0.702,
|
2497 |
+
"eval_steps_per_second": 0.089,
|
2498 |
+
"eval_wer": 42.69842142896431,
|
2499 |
+
"step": 10000
|
2500 |
}
|
2501 |
],
|
2502 |
"logging_steps": 25,
|
|
|
2504 |
"num_input_tokens_seen": 0,
|
2505 |
"num_train_epochs": 120,
|
2506 |
"save_steps": 1000,
|
2507 |
+
"total_flos": 5.429983718493389e+20,
|
2508 |
"train_batch_size": 16,
|
2509 |
"trial_name": null,
|
2510 |
"trial_params": null
|